lgit-cli 3.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. lgit/__init__.py +75 -0
  2. lgit/__main__.py +8 -0
  3. lgit/analysis.py +326 -0
  4. lgit/api.py +1077 -0
  5. lgit/cache.py +338 -0
  6. lgit/changelog.py +523 -0
  7. lgit/cli.py +1104 -0
  8. lgit/compose.py +2110 -0
  9. lgit/config.py +437 -0
  10. lgit/diffing.py +384 -0
  11. lgit/errors.py +137 -0
  12. lgit/git.py +852 -0
  13. lgit/map_reduce.py +508 -0
  14. lgit/markdown_output.py +709 -0
  15. lgit/models.py +924 -0
  16. lgit/normalization.py +411 -0
  17. lgit/patch.py +784 -0
  18. lgit/profile.py +426 -0
  19. lgit/py.typed +0 -0
  20. lgit/repo.py +287 -0
  21. lgit/resources/__init__.py +1 -0
  22. lgit/resources/commit_types.json +242 -0
  23. lgit/resources/prompts/analysis/default.md +237 -0
  24. lgit/resources/prompts/analysis/markdown.md +112 -0
  25. lgit/resources/prompts/changelog/default.md +89 -0
  26. lgit/resources/prompts/changelog/markdown.md +60 -0
  27. lgit/resources/prompts/compose-bind/default.md +40 -0
  28. lgit/resources/prompts/compose-bind/markdown.md +41 -0
  29. lgit/resources/prompts/compose-intent/default.md +63 -0
  30. lgit/resources/prompts/compose-intent/markdown.md +59 -0
  31. lgit/resources/prompts/fast/default.md +46 -0
  32. lgit/resources/prompts/fast/markdown.md +51 -0
  33. lgit/resources/prompts/map/default.md +67 -0
  34. lgit/resources/prompts/map/markdown.md +63 -0
  35. lgit/resources/prompts/reduce/default.md +81 -0
  36. lgit/resources/prompts/reduce/markdown.md +68 -0
  37. lgit/resources/prompts/summary/default.md +74 -0
  38. lgit/resources/prompts/summary/markdown.md +77 -0
  39. lgit/resources/validation_data.json +1 -0
  40. lgit/rewrite.py +392 -0
  41. lgit/style.py +295 -0
  42. lgit/templates.py +385 -0
  43. lgit/testing/__init__.py +62 -0
  44. lgit/testing/compare.py +57 -0
  45. lgit/testing/fixture.py +386 -0
  46. lgit/testing/report.py +201 -0
  47. lgit/testing/runner.py +256 -0
  48. lgit/tokens.py +90 -0
  49. lgit/validation.py +545 -0
  50. lgit_cli-3.7.0.dist-info/METADATA +288 -0
  51. lgit_cli-3.7.0.dist-info/RECORD +54 -0
  52. lgit_cli-3.7.0.dist-info/WHEEL +4 -0
  53. lgit_cli-3.7.0.dist-info/entry_points.txt +2 -0
  54. lgit_cli-3.7.0.dist-info/licenses/LICENSE +21 -0
lgit/normalization.py ADDED
@@ -0,0 +1,411 @@
1
+ """Normalization utilities for conventional commit messages."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ import unicodedata
7
+ from dataclasses import FrozenInstanceError, is_dataclass, replace
8
+ from typing import Any
9
+
10
+ from .validation import is_past_tense_verb, present_to_past, split_verb_token, verb_stem
11
+
12
+ _DEFAULT_MAX_DETAIL_TOKENS = 200
13
+ _DEFAULT_SUMMARY_HARD_LIMIT = 128
14
+
15
+ _PRE_NFKD_REPLACEMENTS = str.maketrans(
16
+ {
17
+ "≠": "!=",
18
+ "½": "1/2",
19
+ "¼": "1/4",
20
+ "¾": "3/4",
21
+ "⅓": "1/3",
22
+ "⅔": "2/3",
23
+ "⅕": "1/5",
24
+ "⅖": "2/5",
25
+ "⅗": "3/5",
26
+ "⅘": "4/5",
27
+ "⅙": "1/6",
28
+ "⅚": "5/6",
29
+ "⅛": "1/8",
30
+ "⅜": "3/8",
31
+ "⅝": "5/8",
32
+ "⅞": "7/8",
33
+ "⁰": "^0",
34
+ "¹": "^1",
35
+ "²": "^2",
36
+ "³": "^3",
37
+ "⁴": "^4",
38
+ "⁵": "^5",
39
+ "⁶": "^6",
40
+ "⁷": "^7",
41
+ "⁸": "^8",
42
+ "⁹": "^9",
43
+ "₀": "_0",
44
+ "₁": "_1",
45
+ "₂": "_2",
46
+ "₃": "_3",
47
+ "₄": "_4",
48
+ "₅": "_5",
49
+ "₆": "_6",
50
+ "₇": "_7",
51
+ "₈": "_8",
52
+ "₉": "_9",
53
+ }
54
+ )
55
+
56
+ _POST_NFKD_REPLACEMENTS = str.maketrans(
57
+ {
58
+ "‘": "'",
59
+ "’": "'",
60
+ "‚": "'",
61
+ "‹": "'",
62
+ "›": "'",
63
+ "“": '"',
64
+ "”": '"',
65
+ "„": '"',
66
+ "«": '"',
67
+ "»": '"',
68
+ "‐": "-",
69
+ "‑": "-",
70
+ "‒": "-",
71
+ "–": "--",
72
+ "—": "--",
73
+ "―": "--",
74
+ "−": "-",
75
+ "→": "->",
76
+ "←": "<-",
77
+ "↔": "<->",
78
+ "⇒": "=>",
79
+ "⇐": "<=",
80
+ "⇔": "<=>",
81
+ "↑": "^",
82
+ "↓": "v",
83
+ "≤": "<=",
84
+ "≥": ">=",
85
+ "≈": "~=",
86
+ "≡": "==",
87
+ "×": "x",
88
+ "÷": "/",
89
+ "…": "...",
90
+ "⋯": "...",
91
+ "⋮": "...",
92
+ "•": "-",
93
+ "◦": "-",
94
+ "▪": "-",
95
+ "▫": "-",
96
+ "◆": "-",
97
+ "◇": "-",
98
+ "✓": "v",
99
+ "✔": "v",
100
+ "✗": "x",
101
+ "✘": "x",
102
+ "λ": "lambda",
103
+ "α": "alpha",
104
+ "β": "beta",
105
+ "γ": "gamma",
106
+ "δ": "delta",
107
+ "ε": "epsilon",
108
+ "θ": "theta",
109
+ "μ": "mu",
110
+ "π": "pi",
111
+ "σ": "sigma",
112
+ "Σ": "Sigma",
113
+ "Δ": "Delta",
114
+ "Π": "Pi",
115
+ "\u00a0": " ",
116
+ "\u2000": " ",
117
+ "\u2001": " ",
118
+ "\u2002": " ",
119
+ "\u2003": " ",
120
+ "\u2004": " ",
121
+ "\u2005": " ",
122
+ "\u2006": " ",
123
+ "\u2007": " ",
124
+ "\u2008": " ",
125
+ "\u2009": " ",
126
+ "\u200a": " ",
127
+ "\u202f": " ",
128
+ "\u205f": " ",
129
+ "\u3000": " ",
130
+ "\u200b": "",
131
+ "\u200c": "",
132
+ "\u200d": "",
133
+ "\ufeff": "",
134
+ }
135
+ )
136
+
137
+
138
+ def normalize_unicode(text: str) -> str:
139
+ """Normalize Unicode punctuation, symbols, fractions, arrows, and spaces."""
140
+
141
+ pre_normalized = str(text).translate(_PRE_NFKD_REPLACEMENTS)
142
+ normalized = unicodedata.normalize("NFKD", pre_normalized)
143
+ return normalized.translate(_POST_NFKD_REPLACEMENTS)
144
+
145
+
146
+ def estimate_tokens(text: str) -> int:
147
+ """Estimate token count using the Rust port's four-bytes-per-token rule."""
148
+
149
+ return math.ceil(_byte_len(text) / 4)
150
+
151
+
152
+ def cap_details(details: list[str], max_tokens: int) -> None:
153
+ """Keep highest-priority detail bullets within the approximate token budget."""
154
+
155
+ if not details:
156
+ return
157
+ total = sum(estimate_tokens(detail) for detail in details)
158
+ if total <= max_tokens:
159
+ return
160
+
161
+ scored: list[tuple[int, int, int]] = []
162
+ for index, detail in enumerate(details):
163
+ lower = detail.lower()
164
+ score = 0
165
+ if (
166
+ "security" in lower
167
+ or "vulnerability" in lower
168
+ or "exploit" in lower
169
+ or "critical" in lower
170
+ or ("fix" in lower and "crash" in lower)
171
+ ):
172
+ score += 100
173
+ if "breaking" in lower or "incompatible" in lower:
174
+ score += 90
175
+ if "performance" in lower or "faster" in lower or "optimization" in lower:
176
+ score += 80
177
+ if "fix" in lower or "bug" in lower:
178
+ score += 70
179
+ if "api" in lower or "interface" in lower or "public" in lower:
180
+ score += 50
181
+ if "user" in lower or "client" in lower:
182
+ score += 40
183
+ if "deprecated" in lower or "removed" in lower:
184
+ score += 35
185
+ score += min(_byte_len(detail) // 20, 10)
186
+ scored.append((index, score, estimate_tokens(detail)))
187
+
188
+ budget = max(0, int(max_tokens))
189
+ keep: list[int] = []
190
+ for index, _score, tokens in sorted(scored, key=lambda item: item[1], reverse=True):
191
+ if tokens <= budget:
192
+ keep.append(index)
193
+ budget -= tokens
194
+ keep.sort()
195
+ details[:] = [details[index] for index in keep]
196
+
197
+
198
+ def normalize_summary_verb(summary: str, commit_type: str) -> str:
199
+ """Convert the first present-tense summary verb to past tense when known."""
200
+
201
+ stripped = str(summary).strip()
202
+ if not stripped:
203
+ return stripped
204
+
205
+ parts = stripped.split()
206
+ first_word = parts[0]
207
+ rest = " ".join(parts[1:])
208
+ first_word_lower = first_word.lower()
209
+
210
+ if is_past_tense_verb(first_word_lower):
211
+ if commit_type == "refactor" and first_word_lower == "refactored":
212
+ return _join_first_rest("restructured", rest)
213
+ return stripped
214
+
215
+ split = split_verb_token(first_word)
216
+ if split is None:
217
+ return stripped
218
+ stem_raw, suffix = split
219
+ stem = stem_raw.lower()
220
+ if verb_stem(first_word) is None:
221
+ return stripped
222
+ if suffix and not (suffix.startswith("-") or suffix.startswith("/")):
223
+ return stripped
224
+
225
+ if stem == "re" and suffix.startswith("-"):
226
+ after_dash = suffix[1:]
227
+ inner_length = 0
228
+ for character in after_dash:
229
+ if not character.isascii() or not character.isalpha():
230
+ break
231
+ inner_length += 1
232
+ if inner_length == 0:
233
+ return stripped
234
+ inner = after_dash[:inner_length].lower()
235
+ tail = after_dash[inner_length:]
236
+ inner_past = _past_for_presentish(inner)
237
+ if inner_past is None:
238
+ return stripped
239
+ if commit_type == "refactor" and inner_past == "refactored":
240
+ inner_past = "restructured"
241
+ return _join_first_rest(f"re-{inner_past}{tail}", rest)
242
+
243
+ past = _past_for_presentish(stem)
244
+ if past is None:
245
+ return stripped
246
+ if commit_type == "refactor" and past == "refactored":
247
+ past = "restructured"
248
+ return _join_first_rest(f"{past}{suffix}", rest)
249
+
250
+
251
+ def post_process_commit_message(msg: Any, config: Any | None = None) -> Any:
252
+ """Return a normalized conventional commit, rebuilding frozen dataclasses."""
253
+
254
+ summary = normalize_unicode(_summary_text(msg))
255
+ body = [normalize_unicode(str(item)) for item in getattr(msg, "body", ())]
256
+ footers = [normalize_unicode(str(item)) for item in getattr(msg, "footers", ())]
257
+
258
+ summary = " ".join(summary.replace("\r", " ").replace("\n", " ").split())
259
+ summary = _trim_summary_suffix(summary.strip()).strip()
260
+ summary = _lowercase_first_token(summary)
261
+ summary = normalize_summary_verb(summary, _commit_type_text(msg))
262
+ summary = _lowercase_first_token(summary.strip()).rstrip(".").strip()
263
+
264
+ normalized_summary = _coerce_summary(getattr(msg, "summary", ""), summary, _summary_hard_limit(config))
265
+
266
+ cleaned_body: list[str] = []
267
+ for item in body:
268
+ cleaned = _strip_body_prefix(item.replace("\r", " ").replace("\n", " "))
269
+ cleaned = _trim_body_suffix(" ".join(cleaned.split()).strip()).strip()
270
+ if not cleaned:
271
+ continue
272
+ cleaned = _capitalize_first_letter(cleaned)
273
+ if not cleaned.endswith("."):
274
+ cleaned += "."
275
+ cleaned_body.append(cleaned)
276
+ cap_details(cleaned_body, int(getattr(config, "max_detail_tokens", _DEFAULT_MAX_DETAIL_TOKENS)))
277
+
278
+ try:
279
+ msg.summary = normalized_summary
280
+ msg.body = cleaned_body
281
+ msg.footers = footers
282
+ except AttributeError, FrozenInstanceError:
283
+ if is_dataclass(msg):
284
+ return replace(msg, summary=normalized_summary, body=tuple(cleaned_body), footers=tuple(footers))
285
+ raise
286
+ return msg
287
+
288
+
289
+ def format_commit_message(msg: Any) -> str:
290
+ """Format a conventional commit object as a commit message string."""
291
+
292
+ commit_type = _commit_type_text(msg)
293
+ scope = _scope_text(msg)
294
+ scope_part = f"({scope})" if scope else ""
295
+ result = f"{commit_type}{scope_part}: {_summary_text(msg)}"
296
+
297
+ body = [str(item) for item in getattr(msg, "body", ()) if str(item).strip()]
298
+ if body:
299
+ result += "\n\n" + "\n".join(f"- {item}" for item in body)
300
+
301
+ footers = [str(item) for item in getattr(msg, "footers", ()) if str(item).strip()]
302
+ if footers:
303
+ result += "\n\n" + "\n".join(footers)
304
+ return result
305
+
306
+
307
+ def _past_for_presentish(stem: str) -> str | None:
308
+ direct = present_to_past(stem)
309
+ if direct is not None:
310
+ return direct
311
+ if stem.endswith("s"):
312
+ singular = present_to_past(stem[:-1])
313
+ if singular is not None:
314
+ return singular
315
+ if stem.endswith("es"):
316
+ singular = present_to_past(stem[:-2])
317
+ if singular is not None:
318
+ return singular
319
+ if stem.endswith("ies"):
320
+ singular = present_to_past(f"{stem[:-3]}y")
321
+ if singular is not None:
322
+ return singular
323
+ return None
324
+
325
+
326
+ def _byte_len(value: str) -> int:
327
+ return len(value.encode("utf-8"))
328
+
329
+
330
+ def _join_first_rest(first: str, rest: str) -> str:
331
+ return first if not rest else f"{first} {rest}"
332
+
333
+
334
+ def _lowercase_first_token(text: str) -> str:
335
+ if not text or _first_token_is_all_caps(text):
336
+ return text
337
+ first = text[0]
338
+ if first.isupper():
339
+ return f"{first.lower()}{text[1:]}"
340
+ return text
341
+
342
+
343
+ def _first_token_is_all_caps(text: str) -> bool:
344
+ parts = text.split(maxsplit=1)
345
+ if not parts:
346
+ return False
347
+ token = parts[0]
348
+ letters = [character for character in token if character.isalpha()]
349
+ return bool(letters) and all(character.isupper() for character in letters)
350
+
351
+
352
+ def _capitalize_first_letter(text: str) -> str:
353
+ if text and text[0].islower():
354
+ return f"{text[0].upper()}{text[1:]}"
355
+ return text
356
+
357
+
358
+ def _trim_summary_suffix(text: str) -> str:
359
+ return text.rstrip(".;:")
360
+
361
+
362
+ def _strip_body_prefix(text: str) -> str:
363
+ stripped = text.strip()
364
+ return stripped.lstrip("•-*+").strip()
365
+
366
+
367
+ def _trim_body_suffix(text: str) -> str:
368
+ return text.rstrip(".;,")
369
+
370
+
371
+ def _coerce_summary(current: Any, value: str, max_length: int) -> Any:
372
+ if isinstance(current, str):
373
+ return value
374
+ factory = getattr(type(current), "from_raw", None)
375
+ if callable(factory):
376
+ return factory(value, max_length=max_length)
377
+ try:
378
+ from .models import CommitSummary
379
+ except ImportError:
380
+ return value
381
+ return CommitSummary.from_raw(value, max_length=max_length)
382
+
383
+
384
+ def _summary_hard_limit(config: Any | None) -> int:
385
+ return int(getattr(config, "summary_hard_limit", _DEFAULT_SUMMARY_HARD_LIMIT))
386
+
387
+
388
+ def _commit_type_text(msg: Any) -> str:
389
+ return str(getattr(msg, "commit_type", getattr(msg, "type", ""))).strip().lower()
390
+
391
+
392
+ def _scope_text(msg: Any) -> str | None:
393
+ scope = getattr(msg, "scope", None)
394
+ if scope is None:
395
+ return None
396
+ return str(scope).strip().lower()
397
+
398
+
399
+ def _summary_text(msg: Any) -> str:
400
+ summary = getattr(msg, "summary", "")
401
+ return str(getattr(summary, "value", summary))
402
+
403
+
404
+ __all__ = [
405
+ "cap_details",
406
+ "estimate_tokens",
407
+ "format_commit_message",
408
+ "normalize_summary_verb",
409
+ "normalize_unicode",
410
+ "post_process_commit_message",
411
+ ]