lgit-cli 3.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lgit/__init__.py +75 -0
- lgit/__main__.py +8 -0
- lgit/analysis.py +326 -0
- lgit/api.py +1077 -0
- lgit/cache.py +338 -0
- lgit/changelog.py +523 -0
- lgit/cli.py +1104 -0
- lgit/compose.py +2110 -0
- lgit/config.py +437 -0
- lgit/diffing.py +384 -0
- lgit/errors.py +137 -0
- lgit/git.py +852 -0
- lgit/map_reduce.py +508 -0
- lgit/markdown_output.py +709 -0
- lgit/models.py +924 -0
- lgit/normalization.py +411 -0
- lgit/patch.py +784 -0
- lgit/profile.py +426 -0
- lgit/py.typed +0 -0
- lgit/repo.py +287 -0
- lgit/resources/__init__.py +1 -0
- lgit/resources/commit_types.json +242 -0
- lgit/resources/prompts/analysis/default.md +237 -0
- lgit/resources/prompts/analysis/markdown.md +112 -0
- lgit/resources/prompts/changelog/default.md +89 -0
- lgit/resources/prompts/changelog/markdown.md +60 -0
- lgit/resources/prompts/compose-bind/default.md +40 -0
- lgit/resources/prompts/compose-bind/markdown.md +41 -0
- lgit/resources/prompts/compose-intent/default.md +63 -0
- lgit/resources/prompts/compose-intent/markdown.md +59 -0
- lgit/resources/prompts/fast/default.md +46 -0
- lgit/resources/prompts/fast/markdown.md +51 -0
- lgit/resources/prompts/map/default.md +67 -0
- lgit/resources/prompts/map/markdown.md +63 -0
- lgit/resources/prompts/reduce/default.md +81 -0
- lgit/resources/prompts/reduce/markdown.md +68 -0
- lgit/resources/prompts/summary/default.md +74 -0
- lgit/resources/prompts/summary/markdown.md +77 -0
- lgit/resources/validation_data.json +1 -0
- lgit/rewrite.py +392 -0
- lgit/style.py +295 -0
- lgit/templates.py +385 -0
- lgit/testing/__init__.py +62 -0
- lgit/testing/compare.py +57 -0
- lgit/testing/fixture.py +386 -0
- lgit/testing/report.py +201 -0
- lgit/testing/runner.py +256 -0
- lgit/tokens.py +90 -0
- lgit/validation.py +545 -0
- lgit_cli-3.7.0.dist-info/METADATA +288 -0
- lgit_cli-3.7.0.dist-info/RECORD +54 -0
- lgit_cli-3.7.0.dist-info/WHEEL +4 -0
- lgit_cli-3.7.0.dist-info/entry_points.txt +2 -0
- lgit_cli-3.7.0.dist-info/licenses/LICENSE +21 -0
lgit/normalization.py
ADDED
|
@@ -0,0 +1,411 @@
|
|
|
1
|
+
"""Normalization utilities for conventional commit messages."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import math
|
|
6
|
+
import unicodedata
|
|
7
|
+
from dataclasses import FrozenInstanceError, is_dataclass, replace
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from .validation import is_past_tense_verb, present_to_past, split_verb_token, verb_stem
|
|
11
|
+
|
|
12
|
+
_DEFAULT_MAX_DETAIL_TOKENS = 200
|
|
13
|
+
_DEFAULT_SUMMARY_HARD_LIMIT = 128
|
|
14
|
+
|
|
15
|
+
_PRE_NFKD_REPLACEMENTS = str.maketrans(
|
|
16
|
+
{
|
|
17
|
+
"≠": "!=",
|
|
18
|
+
"½": "1/2",
|
|
19
|
+
"¼": "1/4",
|
|
20
|
+
"¾": "3/4",
|
|
21
|
+
"⅓": "1/3",
|
|
22
|
+
"⅔": "2/3",
|
|
23
|
+
"⅕": "1/5",
|
|
24
|
+
"⅖": "2/5",
|
|
25
|
+
"⅗": "3/5",
|
|
26
|
+
"⅘": "4/5",
|
|
27
|
+
"⅙": "1/6",
|
|
28
|
+
"⅚": "5/6",
|
|
29
|
+
"⅛": "1/8",
|
|
30
|
+
"⅜": "3/8",
|
|
31
|
+
"⅝": "5/8",
|
|
32
|
+
"⅞": "7/8",
|
|
33
|
+
"⁰": "^0",
|
|
34
|
+
"¹": "^1",
|
|
35
|
+
"²": "^2",
|
|
36
|
+
"³": "^3",
|
|
37
|
+
"⁴": "^4",
|
|
38
|
+
"⁵": "^5",
|
|
39
|
+
"⁶": "^6",
|
|
40
|
+
"⁷": "^7",
|
|
41
|
+
"⁸": "^8",
|
|
42
|
+
"⁹": "^9",
|
|
43
|
+
"₀": "_0",
|
|
44
|
+
"₁": "_1",
|
|
45
|
+
"₂": "_2",
|
|
46
|
+
"₃": "_3",
|
|
47
|
+
"₄": "_4",
|
|
48
|
+
"₅": "_5",
|
|
49
|
+
"₆": "_6",
|
|
50
|
+
"₇": "_7",
|
|
51
|
+
"₈": "_8",
|
|
52
|
+
"₉": "_9",
|
|
53
|
+
}
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
_POST_NFKD_REPLACEMENTS = str.maketrans(
|
|
57
|
+
{
|
|
58
|
+
"‘": "'",
|
|
59
|
+
"’": "'",
|
|
60
|
+
"‚": "'",
|
|
61
|
+
"‹": "'",
|
|
62
|
+
"›": "'",
|
|
63
|
+
"“": '"',
|
|
64
|
+
"”": '"',
|
|
65
|
+
"„": '"',
|
|
66
|
+
"«": '"',
|
|
67
|
+
"»": '"',
|
|
68
|
+
"‐": "-",
|
|
69
|
+
"‑": "-",
|
|
70
|
+
"‒": "-",
|
|
71
|
+
"–": "--",
|
|
72
|
+
"—": "--",
|
|
73
|
+
"―": "--",
|
|
74
|
+
"−": "-",
|
|
75
|
+
"→": "->",
|
|
76
|
+
"←": "<-",
|
|
77
|
+
"↔": "<->",
|
|
78
|
+
"⇒": "=>",
|
|
79
|
+
"⇐": "<=",
|
|
80
|
+
"⇔": "<=>",
|
|
81
|
+
"↑": "^",
|
|
82
|
+
"↓": "v",
|
|
83
|
+
"≤": "<=",
|
|
84
|
+
"≥": ">=",
|
|
85
|
+
"≈": "~=",
|
|
86
|
+
"≡": "==",
|
|
87
|
+
"×": "x",
|
|
88
|
+
"÷": "/",
|
|
89
|
+
"…": "...",
|
|
90
|
+
"⋯": "...",
|
|
91
|
+
"⋮": "...",
|
|
92
|
+
"•": "-",
|
|
93
|
+
"◦": "-",
|
|
94
|
+
"▪": "-",
|
|
95
|
+
"▫": "-",
|
|
96
|
+
"◆": "-",
|
|
97
|
+
"◇": "-",
|
|
98
|
+
"✓": "v",
|
|
99
|
+
"✔": "v",
|
|
100
|
+
"✗": "x",
|
|
101
|
+
"✘": "x",
|
|
102
|
+
"λ": "lambda",
|
|
103
|
+
"α": "alpha",
|
|
104
|
+
"β": "beta",
|
|
105
|
+
"γ": "gamma",
|
|
106
|
+
"δ": "delta",
|
|
107
|
+
"ε": "epsilon",
|
|
108
|
+
"θ": "theta",
|
|
109
|
+
"μ": "mu",
|
|
110
|
+
"π": "pi",
|
|
111
|
+
"σ": "sigma",
|
|
112
|
+
"Σ": "Sigma",
|
|
113
|
+
"Δ": "Delta",
|
|
114
|
+
"Π": "Pi",
|
|
115
|
+
"\u00a0": " ",
|
|
116
|
+
"\u2000": " ",
|
|
117
|
+
"\u2001": " ",
|
|
118
|
+
"\u2002": " ",
|
|
119
|
+
"\u2003": " ",
|
|
120
|
+
"\u2004": " ",
|
|
121
|
+
"\u2005": " ",
|
|
122
|
+
"\u2006": " ",
|
|
123
|
+
"\u2007": " ",
|
|
124
|
+
"\u2008": " ",
|
|
125
|
+
"\u2009": " ",
|
|
126
|
+
"\u200a": " ",
|
|
127
|
+
"\u202f": " ",
|
|
128
|
+
"\u205f": " ",
|
|
129
|
+
"\u3000": " ",
|
|
130
|
+
"\u200b": "",
|
|
131
|
+
"\u200c": "",
|
|
132
|
+
"\u200d": "",
|
|
133
|
+
"\ufeff": "",
|
|
134
|
+
}
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def normalize_unicode(text: str) -> str:
|
|
139
|
+
"""Normalize Unicode punctuation, symbols, fractions, arrows, and spaces."""
|
|
140
|
+
|
|
141
|
+
pre_normalized = str(text).translate(_PRE_NFKD_REPLACEMENTS)
|
|
142
|
+
normalized = unicodedata.normalize("NFKD", pre_normalized)
|
|
143
|
+
return normalized.translate(_POST_NFKD_REPLACEMENTS)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def estimate_tokens(text: str) -> int:
|
|
147
|
+
"""Estimate token count using the Rust port's four-bytes-per-token rule."""
|
|
148
|
+
|
|
149
|
+
return math.ceil(_byte_len(text) / 4)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def cap_details(details: list[str], max_tokens: int) -> None:
|
|
153
|
+
"""Keep highest-priority detail bullets within the approximate token budget."""
|
|
154
|
+
|
|
155
|
+
if not details:
|
|
156
|
+
return
|
|
157
|
+
total = sum(estimate_tokens(detail) for detail in details)
|
|
158
|
+
if total <= max_tokens:
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
scored: list[tuple[int, int, int]] = []
|
|
162
|
+
for index, detail in enumerate(details):
|
|
163
|
+
lower = detail.lower()
|
|
164
|
+
score = 0
|
|
165
|
+
if (
|
|
166
|
+
"security" in lower
|
|
167
|
+
or "vulnerability" in lower
|
|
168
|
+
or "exploit" in lower
|
|
169
|
+
or "critical" in lower
|
|
170
|
+
or ("fix" in lower and "crash" in lower)
|
|
171
|
+
):
|
|
172
|
+
score += 100
|
|
173
|
+
if "breaking" in lower or "incompatible" in lower:
|
|
174
|
+
score += 90
|
|
175
|
+
if "performance" in lower or "faster" in lower or "optimization" in lower:
|
|
176
|
+
score += 80
|
|
177
|
+
if "fix" in lower or "bug" in lower:
|
|
178
|
+
score += 70
|
|
179
|
+
if "api" in lower or "interface" in lower or "public" in lower:
|
|
180
|
+
score += 50
|
|
181
|
+
if "user" in lower or "client" in lower:
|
|
182
|
+
score += 40
|
|
183
|
+
if "deprecated" in lower or "removed" in lower:
|
|
184
|
+
score += 35
|
|
185
|
+
score += min(_byte_len(detail) // 20, 10)
|
|
186
|
+
scored.append((index, score, estimate_tokens(detail)))
|
|
187
|
+
|
|
188
|
+
budget = max(0, int(max_tokens))
|
|
189
|
+
keep: list[int] = []
|
|
190
|
+
for index, _score, tokens in sorted(scored, key=lambda item: item[1], reverse=True):
|
|
191
|
+
if tokens <= budget:
|
|
192
|
+
keep.append(index)
|
|
193
|
+
budget -= tokens
|
|
194
|
+
keep.sort()
|
|
195
|
+
details[:] = [details[index] for index in keep]
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def normalize_summary_verb(summary: str, commit_type: str) -> str:
|
|
199
|
+
"""Convert the first present-tense summary verb to past tense when known."""
|
|
200
|
+
|
|
201
|
+
stripped = str(summary).strip()
|
|
202
|
+
if not stripped:
|
|
203
|
+
return stripped
|
|
204
|
+
|
|
205
|
+
parts = stripped.split()
|
|
206
|
+
first_word = parts[0]
|
|
207
|
+
rest = " ".join(parts[1:])
|
|
208
|
+
first_word_lower = first_word.lower()
|
|
209
|
+
|
|
210
|
+
if is_past_tense_verb(first_word_lower):
|
|
211
|
+
if commit_type == "refactor" and first_word_lower == "refactored":
|
|
212
|
+
return _join_first_rest("restructured", rest)
|
|
213
|
+
return stripped
|
|
214
|
+
|
|
215
|
+
split = split_verb_token(first_word)
|
|
216
|
+
if split is None:
|
|
217
|
+
return stripped
|
|
218
|
+
stem_raw, suffix = split
|
|
219
|
+
stem = stem_raw.lower()
|
|
220
|
+
if verb_stem(first_word) is None:
|
|
221
|
+
return stripped
|
|
222
|
+
if suffix and not (suffix.startswith("-") or suffix.startswith("/")):
|
|
223
|
+
return stripped
|
|
224
|
+
|
|
225
|
+
if stem == "re" and suffix.startswith("-"):
|
|
226
|
+
after_dash = suffix[1:]
|
|
227
|
+
inner_length = 0
|
|
228
|
+
for character in after_dash:
|
|
229
|
+
if not character.isascii() or not character.isalpha():
|
|
230
|
+
break
|
|
231
|
+
inner_length += 1
|
|
232
|
+
if inner_length == 0:
|
|
233
|
+
return stripped
|
|
234
|
+
inner = after_dash[:inner_length].lower()
|
|
235
|
+
tail = after_dash[inner_length:]
|
|
236
|
+
inner_past = _past_for_presentish(inner)
|
|
237
|
+
if inner_past is None:
|
|
238
|
+
return stripped
|
|
239
|
+
if commit_type == "refactor" and inner_past == "refactored":
|
|
240
|
+
inner_past = "restructured"
|
|
241
|
+
return _join_first_rest(f"re-{inner_past}{tail}", rest)
|
|
242
|
+
|
|
243
|
+
past = _past_for_presentish(stem)
|
|
244
|
+
if past is None:
|
|
245
|
+
return stripped
|
|
246
|
+
if commit_type == "refactor" and past == "refactored":
|
|
247
|
+
past = "restructured"
|
|
248
|
+
return _join_first_rest(f"{past}{suffix}", rest)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def post_process_commit_message(msg: Any, config: Any | None = None) -> Any:
|
|
252
|
+
"""Return a normalized conventional commit, rebuilding frozen dataclasses."""
|
|
253
|
+
|
|
254
|
+
summary = normalize_unicode(_summary_text(msg))
|
|
255
|
+
body = [normalize_unicode(str(item)) for item in getattr(msg, "body", ())]
|
|
256
|
+
footers = [normalize_unicode(str(item)) for item in getattr(msg, "footers", ())]
|
|
257
|
+
|
|
258
|
+
summary = " ".join(summary.replace("\r", " ").replace("\n", " ").split())
|
|
259
|
+
summary = _trim_summary_suffix(summary.strip()).strip()
|
|
260
|
+
summary = _lowercase_first_token(summary)
|
|
261
|
+
summary = normalize_summary_verb(summary, _commit_type_text(msg))
|
|
262
|
+
summary = _lowercase_first_token(summary.strip()).rstrip(".").strip()
|
|
263
|
+
|
|
264
|
+
normalized_summary = _coerce_summary(getattr(msg, "summary", ""), summary, _summary_hard_limit(config))
|
|
265
|
+
|
|
266
|
+
cleaned_body: list[str] = []
|
|
267
|
+
for item in body:
|
|
268
|
+
cleaned = _strip_body_prefix(item.replace("\r", " ").replace("\n", " "))
|
|
269
|
+
cleaned = _trim_body_suffix(" ".join(cleaned.split()).strip()).strip()
|
|
270
|
+
if not cleaned:
|
|
271
|
+
continue
|
|
272
|
+
cleaned = _capitalize_first_letter(cleaned)
|
|
273
|
+
if not cleaned.endswith("."):
|
|
274
|
+
cleaned += "."
|
|
275
|
+
cleaned_body.append(cleaned)
|
|
276
|
+
cap_details(cleaned_body, int(getattr(config, "max_detail_tokens", _DEFAULT_MAX_DETAIL_TOKENS)))
|
|
277
|
+
|
|
278
|
+
try:
|
|
279
|
+
msg.summary = normalized_summary
|
|
280
|
+
msg.body = cleaned_body
|
|
281
|
+
msg.footers = footers
|
|
282
|
+
except AttributeError, FrozenInstanceError:
|
|
283
|
+
if is_dataclass(msg):
|
|
284
|
+
return replace(msg, summary=normalized_summary, body=tuple(cleaned_body), footers=tuple(footers))
|
|
285
|
+
raise
|
|
286
|
+
return msg
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def format_commit_message(msg: Any) -> str:
|
|
290
|
+
"""Format a conventional commit object as a commit message string."""
|
|
291
|
+
|
|
292
|
+
commit_type = _commit_type_text(msg)
|
|
293
|
+
scope = _scope_text(msg)
|
|
294
|
+
scope_part = f"({scope})" if scope else ""
|
|
295
|
+
result = f"{commit_type}{scope_part}: {_summary_text(msg)}"
|
|
296
|
+
|
|
297
|
+
body = [str(item) for item in getattr(msg, "body", ()) if str(item).strip()]
|
|
298
|
+
if body:
|
|
299
|
+
result += "\n\n" + "\n".join(f"- {item}" for item in body)
|
|
300
|
+
|
|
301
|
+
footers = [str(item) for item in getattr(msg, "footers", ()) if str(item).strip()]
|
|
302
|
+
if footers:
|
|
303
|
+
result += "\n\n" + "\n".join(footers)
|
|
304
|
+
return result
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def _past_for_presentish(stem: str) -> str | None:
|
|
308
|
+
direct = present_to_past(stem)
|
|
309
|
+
if direct is not None:
|
|
310
|
+
return direct
|
|
311
|
+
if stem.endswith("s"):
|
|
312
|
+
singular = present_to_past(stem[:-1])
|
|
313
|
+
if singular is not None:
|
|
314
|
+
return singular
|
|
315
|
+
if stem.endswith("es"):
|
|
316
|
+
singular = present_to_past(stem[:-2])
|
|
317
|
+
if singular is not None:
|
|
318
|
+
return singular
|
|
319
|
+
if stem.endswith("ies"):
|
|
320
|
+
singular = present_to_past(f"{stem[:-3]}y")
|
|
321
|
+
if singular is not None:
|
|
322
|
+
return singular
|
|
323
|
+
return None
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def _byte_len(value: str) -> int:
|
|
327
|
+
return len(value.encode("utf-8"))
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def _join_first_rest(first: str, rest: str) -> str:
|
|
331
|
+
return first if not rest else f"{first} {rest}"
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def _lowercase_first_token(text: str) -> str:
|
|
335
|
+
if not text or _first_token_is_all_caps(text):
|
|
336
|
+
return text
|
|
337
|
+
first = text[0]
|
|
338
|
+
if first.isupper():
|
|
339
|
+
return f"{first.lower()}{text[1:]}"
|
|
340
|
+
return text
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _first_token_is_all_caps(text: str) -> bool:
|
|
344
|
+
parts = text.split(maxsplit=1)
|
|
345
|
+
if not parts:
|
|
346
|
+
return False
|
|
347
|
+
token = parts[0]
|
|
348
|
+
letters = [character for character in token if character.isalpha()]
|
|
349
|
+
return bool(letters) and all(character.isupper() for character in letters)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _capitalize_first_letter(text: str) -> str:
|
|
353
|
+
if text and text[0].islower():
|
|
354
|
+
return f"{text[0].upper()}{text[1:]}"
|
|
355
|
+
return text
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def _trim_summary_suffix(text: str) -> str:
|
|
359
|
+
return text.rstrip(".;:")
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _strip_body_prefix(text: str) -> str:
|
|
363
|
+
stripped = text.strip()
|
|
364
|
+
return stripped.lstrip("•-*+").strip()
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _trim_body_suffix(text: str) -> str:
|
|
368
|
+
return text.rstrip(".;,")
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _coerce_summary(current: Any, value: str, max_length: int) -> Any:
|
|
372
|
+
if isinstance(current, str):
|
|
373
|
+
return value
|
|
374
|
+
factory = getattr(type(current), "from_raw", None)
|
|
375
|
+
if callable(factory):
|
|
376
|
+
return factory(value, max_length=max_length)
|
|
377
|
+
try:
|
|
378
|
+
from .models import CommitSummary
|
|
379
|
+
except ImportError:
|
|
380
|
+
return value
|
|
381
|
+
return CommitSummary.from_raw(value, max_length=max_length)
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def _summary_hard_limit(config: Any | None) -> int:
|
|
385
|
+
return int(getattr(config, "summary_hard_limit", _DEFAULT_SUMMARY_HARD_LIMIT))
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def _commit_type_text(msg: Any) -> str:
|
|
389
|
+
return str(getattr(msg, "commit_type", getattr(msg, "type", ""))).strip().lower()
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def _scope_text(msg: Any) -> str | None:
|
|
393
|
+
scope = getattr(msg, "scope", None)
|
|
394
|
+
if scope is None:
|
|
395
|
+
return None
|
|
396
|
+
return str(scope).strip().lower()
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def _summary_text(msg: Any) -> str:
|
|
400
|
+
summary = getattr(msg, "summary", "")
|
|
401
|
+
return str(getattr(summary, "value", summary))
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
__all__ = [
|
|
405
|
+
"cap_details",
|
|
406
|
+
"estimate_tokens",
|
|
407
|
+
"format_commit_message",
|
|
408
|
+
"normalize_summary_verb",
|
|
409
|
+
"normalize_unicode",
|
|
410
|
+
"post_process_commit_message",
|
|
411
|
+
]
|