lgit-cli 3.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. lgit/__init__.py +75 -0
  2. lgit/__main__.py +8 -0
  3. lgit/analysis.py +326 -0
  4. lgit/api.py +1077 -0
  5. lgit/cache.py +338 -0
  6. lgit/changelog.py +523 -0
  7. lgit/cli.py +1104 -0
  8. lgit/compose.py +2110 -0
  9. lgit/config.py +437 -0
  10. lgit/diffing.py +384 -0
  11. lgit/errors.py +137 -0
  12. lgit/git.py +852 -0
  13. lgit/map_reduce.py +508 -0
  14. lgit/markdown_output.py +709 -0
  15. lgit/models.py +924 -0
  16. lgit/normalization.py +411 -0
  17. lgit/patch.py +784 -0
  18. lgit/profile.py +426 -0
  19. lgit/py.typed +0 -0
  20. lgit/repo.py +287 -0
  21. lgit/resources/__init__.py +1 -0
  22. lgit/resources/commit_types.json +242 -0
  23. lgit/resources/prompts/analysis/default.md +237 -0
  24. lgit/resources/prompts/analysis/markdown.md +112 -0
  25. lgit/resources/prompts/changelog/default.md +89 -0
  26. lgit/resources/prompts/changelog/markdown.md +60 -0
  27. lgit/resources/prompts/compose-bind/default.md +40 -0
  28. lgit/resources/prompts/compose-bind/markdown.md +41 -0
  29. lgit/resources/prompts/compose-intent/default.md +63 -0
  30. lgit/resources/prompts/compose-intent/markdown.md +59 -0
  31. lgit/resources/prompts/fast/default.md +46 -0
  32. lgit/resources/prompts/fast/markdown.md +51 -0
  33. lgit/resources/prompts/map/default.md +67 -0
  34. lgit/resources/prompts/map/markdown.md +63 -0
  35. lgit/resources/prompts/reduce/default.md +81 -0
  36. lgit/resources/prompts/reduce/markdown.md +68 -0
  37. lgit/resources/prompts/summary/default.md +74 -0
  38. lgit/resources/prompts/summary/markdown.md +77 -0
  39. lgit/resources/validation_data.json +1 -0
  40. lgit/rewrite.py +392 -0
  41. lgit/style.py +295 -0
  42. lgit/templates.py +385 -0
  43. lgit/testing/__init__.py +62 -0
  44. lgit/testing/compare.py +57 -0
  45. lgit/testing/fixture.py +386 -0
  46. lgit/testing/report.py +201 -0
  47. lgit/testing/runner.py +256 -0
  48. lgit/tokens.py +90 -0
  49. lgit/validation.py +545 -0
  50. lgit_cli-3.7.0.dist-info/METADATA +288 -0
  51. lgit_cli-3.7.0.dist-info/RECORD +54 -0
  52. lgit_cli-3.7.0.dist-info/WHEEL +4 -0
  53. lgit_cli-3.7.0.dist-info/entry_points.txt +2 -0
  54. lgit_cli-3.7.0.dist-info/licenses/LICENSE +21 -0
lgit/validation.py ADDED
@@ -0,0 +1,545 @@
1
+ """Commit-message validation helpers.
2
+
3
+ The lookup tables in this module are loaded from package resources so installed
4
+ CLI behavior does not depend on the source checkout layout.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from collections.abc import Iterable
11
+ from dataclasses import dataclass
12
+ from functools import cache
13
+ from importlib import resources
14
+ from pathlib import PurePosixPath
15
+ from typing import Any, Literal
16
+
17
+ IssueSeverity = Literal["error", "warning"]
18
+
19
+ _DEFAULT_GUIDELINE = 72
20
+ _DEFAULT_SOFT_LIMIT = 96
21
+ _DEFAULT_HARD_LIMIT = 128
22
+
23
+ _FALLBACK_TYPES_ORDERED = (
24
+ "feat",
25
+ "fix",
26
+ "refactor",
27
+ "docs",
28
+ "test",
29
+ "chore",
30
+ "style",
31
+ "perf",
32
+ "build",
33
+ "ci",
34
+ "revert",
35
+ "deps",
36
+ "security",
37
+ "config",
38
+ "ux",
39
+ "release",
40
+ "hotfix",
41
+ "infra",
42
+ "init",
43
+ "merge",
44
+ "hack",
45
+ "wip",
46
+ )
47
+ _FALLBACK_TYPES = frozenset(_FALLBACK_TYPES_ORDERED)
48
+
49
+
50
+ @dataclass(slots=True, frozen=True)
51
+ class ValidationIssue:
52
+ """One structured validation diagnostic."""
53
+
54
+ severity: IssueSeverity
55
+ field: str
56
+ code: str
57
+ message: str
58
+ value: str | None = None
59
+
60
+
61
+ @dataclass(slots=True, frozen=True)
62
+ class ValidationReport:
63
+ """Structured validation result with separate errors and warnings."""
64
+
65
+ errors: tuple[ValidationIssue, ...] = ()
66
+ warnings: tuple[ValidationIssue, ...] = ()
67
+
68
+ @property
69
+ def ok(self) -> bool:
70
+ """Return whether validation found no blocking errors."""
71
+
72
+ return not self.errors
73
+
74
+ def __bool__(self) -> bool:
75
+ """Treat the report as true only when it has no errors."""
76
+
77
+ return self.ok
78
+
79
+
80
+ @dataclass(slots=True, frozen=True)
81
+ class _ValidationData:
82
+ past_tense: dict[str, str]
83
+ irregular_past: frozenset[str]
84
+ ed_blocklist: frozenset[str]
85
+ d_blocklist: frozenset[str]
86
+ code_extensions: frozenset[str]
87
+ doc_extensions: frozenset[str]
88
+ filler_words: tuple[str, ...]
89
+ meta_phrases: tuple[str, ...]
90
+ body_present_tense: frozenset[str]
91
+
92
+
93
+ @dataclass(slots=True)
94
+ class _IssueBuilder:
95
+ errors: list[ValidationIssue]
96
+ warnings: list[ValidationIssue]
97
+
98
+ @classmethod
99
+ def empty(cls) -> _IssueBuilder:
100
+ return cls(errors=[], warnings=[])
101
+
102
+ def error(self, field: str, code: str, message: str, value: str | None = None) -> None:
103
+ self.errors.append(ValidationIssue("error", field, code, message, value))
104
+
105
+ def warning(self, field: str, code: str, message: str, value: str | None = None) -> None:
106
+ self.warnings.append(ValidationIssue("warning", field, code, message, value))
107
+
108
+ def report(self) -> ValidationReport:
109
+ return ValidationReport(tuple(self.errors), tuple(self.warnings))
110
+
111
+
112
+ @cache
113
+ def _load_validation_data() -> _ValidationData:
114
+ raw = (resources.files("lgit.resources") / "validation_data.json").read_text(encoding="utf-8")
115
+ data = json.loads(raw)
116
+ pairs = [(str(present).lower(), str(past).lower()) for present, past in data["past_tense"]]
117
+ past_tense = dict(pairs)
118
+ unchanged = {past for present, past in pairs if present == past}
119
+ irregular = unchanged | {str(value).lower() for value in data["irregular_past"]}
120
+ return _ValidationData(
121
+ past_tense=past_tense,
122
+ irregular_past=frozenset(irregular),
123
+ ed_blocklist=frozenset(str(value).lower() for value in data["ed_blocklist"]),
124
+ d_blocklist=frozenset(str(value).lower() for value in data["d_blocklist"]),
125
+ code_extensions=frozenset(str(value).lower() for value in data["code_extensions"]),
126
+ doc_extensions=frozenset(str(value).lower() for value in data["doc_extensions"]),
127
+ filler_words=tuple(str(value).lower() for value in data["filler_words"]),
128
+ meta_phrases=tuple(str(value).lower() for value in data["meta_phrases"]),
129
+ body_present_tense=frozenset(str(value).lower() for value in data["body_present_tense"]),
130
+ )
131
+
132
+
133
+ @cache
134
+ def _valid_types_ordered() -> tuple[str, ...]:
135
+ try:
136
+ raw = (resources.files("lgit.resources") / "commit_types.json").read_text(encoding="utf-8")
137
+ data = json.loads(raw)
138
+ except FileNotFoundError, json.JSONDecodeError, KeyError, TypeError:
139
+ return _FALLBACK_TYPES_ORDERED
140
+ types = tuple(str(item["name"]).strip().lower() for item in data.get("types", ()) if item.get("name"))
141
+ return types or _FALLBACK_TYPES_ORDERED
142
+
143
+
144
+ @cache
145
+ def _valid_types() -> frozenset[str]:
146
+ return frozenset(_valid_types_ordered()) or _FALLBACK_TYPES
147
+
148
+
149
+ def _byte_len(value: str) -> int:
150
+ return len(value.encode("utf-8"))
151
+
152
+
153
+ def present_to_past(present: str) -> str | None:
154
+ """Return the configured past-tense form for a lowercase present-tense verb."""
155
+
156
+ return _load_validation_data().past_tense.get(present.lower())
157
+
158
+
159
+ def split_verb_token(token: str) -> tuple[str, str] | None:
160
+ """Split a first token into its leading ASCII verb segment and suffix."""
161
+
162
+ index = 0
163
+ for character in token:
164
+ if not character.isascii() or not character.isalpha():
165
+ break
166
+ index += 1
167
+ if index == 0:
168
+ return None
169
+ return token[:index], token[index:]
170
+
171
+
172
+ def verb_stem(token: str) -> str | None:
173
+ """Return a lowercase leading ASCII verb stem, skipping acronyms and numbers."""
174
+
175
+ split = split_verb_token(token)
176
+ if split is None:
177
+ return None
178
+ stem, _suffix = split
179
+ if stem.isupper():
180
+ return None
181
+ return stem.lower()
182
+
183
+
184
+ def is_past_tense_verb(word: str) -> bool:
185
+ """Return whether a bare word looks like a past-tense verb."""
186
+
187
+ lower = word.lower()
188
+ data = _load_validation_data()
189
+ if any(past == lower and present != past for present, past in data.past_tense.items()):
190
+ return True
191
+ if lower.endswith("ed"):
192
+ return lower not in data.ed_blocklist
193
+ if len(lower) >= 4 and lower.endswith("d") and lower[-2] in "aeiou":
194
+ return lower not in data.d_blocklist
195
+ return lower in data.irregular_past
196
+
197
+
198
+ def is_past_tense_first_word(token: str) -> bool:
199
+ """Return whether a raw first summary token is acceptable past tense."""
200
+
201
+ if not token:
202
+ return False
203
+ if is_past_tense_verb(token.lower()):
204
+ return True
205
+ stem = verb_stem(token)
206
+ if stem is not None and is_past_tense_verb(stem):
207
+ return True
208
+ split = split_verb_token(token)
209
+ if split is None:
210
+ return False
211
+ stem_raw, suffix = split
212
+ if stem_raw.lower() != "re" or not suffix.startswith("-"):
213
+ return False
214
+ rest = suffix[1:]
215
+ inner_length = 0
216
+ for character in rest:
217
+ if not character.isascii() or not character.isalpha():
218
+ break
219
+ inner_length += 1
220
+ if inner_length == 0:
221
+ return False
222
+ inner = rest[:inner_length].lower()
223
+ return is_past_tense_verb(inner) or present_to_past(inner) is not None
224
+
225
+
226
+ def validate_commit_message(
227
+ msg: Any,
228
+ config: Any | None = None,
229
+ *,
230
+ stat: str = "",
231
+ project_names: Iterable[str] = (),
232
+ ) -> ValidationReport:
233
+ """Validate a conventional commit object and return structured diagnostics."""
234
+
235
+ builder = _IssueBuilder.empty()
236
+ commit_type = _commit_type_text(msg)
237
+ scope = _scope_text(msg)
238
+ summary = _summary_text(msg)
239
+ body = tuple(_iter_strings(getattr(msg, "body", ())))
240
+
241
+ _validate_type(commit_type, builder)
242
+ _validate_scope(scope, project_names, builder)
243
+ _validate_summary(summary, commit_type, scope, config, builder)
244
+ if summary.strip():
245
+ _validate_summary_content(summary, commit_type, stat, builder)
246
+ _validate_body(body, builder)
247
+ if stat:
248
+ _type_scope_consistency(commit_type, stat, body, builder)
249
+ return builder.report()
250
+
251
+
252
+ def validate_summary_quality(summary: str, commit_type: str, stat: str = "") -> ValidationReport:
253
+ """Validate a generated summary before building a commit object."""
254
+
255
+ builder = _IssueBuilder.empty()
256
+ cleaned = str(summary).strip()
257
+ if not cleaned:
258
+ builder.error("summary", "empty_summary", "summary is empty")
259
+ return builder.report()
260
+ _validate_summary_content(cleaned, str(commit_type), stat, builder)
261
+ return builder.report()
262
+
263
+
264
+ def check_type_scope_consistency(msg: Any, stat: str) -> ValidationReport:
265
+ """Return warnings for commit type/file-stat consistency heuristics."""
266
+
267
+ builder = _IssueBuilder.empty()
268
+ _type_scope_consistency(_commit_type_text(msg), stat, tuple(_iter_strings(getattr(msg, "body", ()))), builder)
269
+ return builder.report()
270
+
271
+
272
+ def _validate_type(commit_type: str, builder: _IssueBuilder) -> None:
273
+ if commit_type not in _valid_types():
274
+ allowed = ", ".join(_valid_types_ordered())
275
+ builder.error(
276
+ "type",
277
+ "invalid_type",
278
+ f"Invalid commit type: {commit_type!r}. Must be one of: {allowed}",
279
+ commit_type,
280
+ )
281
+
282
+
283
+ def _validate_scope(scope: str | None, project_names: Iterable[str], builder: _IssueBuilder) -> None:
284
+ if scope is None:
285
+ return
286
+ if not scope:
287
+ builder.error("scope", "empty_scope", "Scope cannot be empty string; omit it instead", scope)
288
+ return
289
+ names = (project_names,) if isinstance(project_names, str) else project_names
290
+ project = {_normalize_name(name) for name in names if name}
291
+ if _normalize_name(scope) in project:
292
+ builder.error(
293
+ "scope",
294
+ "project_name_scope",
295
+ f"Scope {scope!r} is the project name; omit scope for project-wide changes",
296
+ scope,
297
+ )
298
+
299
+
300
+ def _validate_summary(
301
+ summary: str,
302
+ commit_type: str,
303
+ scope: str | None,
304
+ config: Any | None,
305
+ builder: _IssueBuilder,
306
+ ) -> None:
307
+ if not summary.strip():
308
+ builder.error("summary", "empty_summary", "Summary cannot be empty", summary)
309
+ return
310
+ if summary.rstrip().endswith("."):
311
+ builder.error(
312
+ "summary",
313
+ "trailing_period",
314
+ "Summary must NOT end with a period (conventional commits style)",
315
+ summary,
316
+ )
317
+
318
+ first_line_len = _byte_len(commit_type) + (_byte_len(scope) + 2 if scope else 0) + 2 + _byte_len(summary)
319
+ guideline = int(getattr(config, "summary_guideline", _DEFAULT_GUIDELINE))
320
+ soft_limit = int(getattr(config, "summary_soft_limit", _DEFAULT_SOFT_LIMIT))
321
+ hard_limit = int(getattr(config, "summary_hard_limit", _DEFAULT_HARD_LIMIT))
322
+ if first_line_len > hard_limit:
323
+ builder.error(
324
+ "summary",
325
+ "summary_too_long",
326
+ f"Summary line exceeds hard limit: {first_line_len} > {hard_limit} chars",
327
+ str(first_line_len),
328
+ )
329
+ elif first_line_len > soft_limit:
330
+ builder.warning(
331
+ "summary",
332
+ "summary_soft_limit",
333
+ f"Summary line exceeds soft limit: {first_line_len} > {soft_limit} chars",
334
+ str(first_line_len),
335
+ )
336
+ elif first_line_len > guideline:
337
+ builder.warning(
338
+ "summary",
339
+ "summary_guideline",
340
+ f"Summary line exceeds guideline: {first_line_len} > {guideline} chars",
341
+ str(first_line_len),
342
+ )
343
+
344
+
345
+ def _validate_summary_content(summary: str, commit_type: str, stat: str, builder: _IssueBuilder) -> None:
346
+ first_word = summary.split(maxsplit=1)[0] if summary.split() else ""
347
+ if not first_word:
348
+ builder.error("summary", "summary_missing_word", "Summary must contain at least one word")
349
+ return
350
+ if not is_past_tense_first_word(first_word):
351
+ builder.error(
352
+ "summary",
353
+ "present_tense_first_word",
354
+ f"Summary must start with a past-tense verb (ending in -ed/-d or irregular). Got {first_word!r}",
355
+ first_word,
356
+ )
357
+ if first_word.lower() == commit_type:
358
+ builder.error(
359
+ "summary",
360
+ "type_word_repetition",
361
+ f"Summary repeats commit type {commit_type!r}: first word is {first_word!r}",
362
+ first_word,
363
+ )
364
+
365
+ lower_summary = summary.lower()
366
+ data = _load_validation_data()
367
+ for filler in data.filler_words:
368
+ if filler in lower_summary:
369
+ builder.warning(
370
+ "summary",
371
+ "filler_word",
372
+ f"Summary contains filler word {filler!r}",
373
+ filler,
374
+ )
375
+ for phrase in data.meta_phrases:
376
+ if phrase in lower_summary:
377
+ builder.warning(
378
+ "summary",
379
+ "meta_phrase",
380
+ f"Summary contains meta-phrase {phrase!r}; describe what changed",
381
+ phrase,
382
+ )
383
+
384
+ if stat:
385
+ _summary_file_mismatch(summary, commit_type, stat, builder)
386
+
387
+
388
+ def _validate_body(body: Iterable[str], builder: _IssueBuilder) -> None:
389
+ present_words = _load_validation_data().body_present_tense
390
+ for index, item in enumerate(body):
391
+ stripped = item.strip()
392
+ first_word = stripped.split(maxsplit=1)[0].lower() if stripped.split() else ""
393
+ if first_word in present_words:
394
+ builder.warning(
395
+ "body",
396
+ "present_tense_body_item",
397
+ f"Body item uses present tense: {stripped!r}",
398
+ str(index),
399
+ )
400
+ if stripped and not stripped.endswith("."):
401
+ builder.warning(
402
+ "body",
403
+ "missing_period_body_item",
404
+ f"Body item is missing a period: {stripped!r}",
405
+ str(index),
406
+ )
407
+
408
+
409
+ def _summary_file_mismatch(summary: str, commit_type: str, stat: str, builder: _IssueBuilder) -> None:
410
+ del summary
411
+ extensions = [
412
+ extension for path in _stat_paths(stat) if (extension := PurePosixPath(path).suffix.lstrip(".").lower())
413
+ ]
414
+ if not extensions:
415
+ return
416
+ total = len(extensions)
417
+ markdown_count = sum(1 for extension in extensions if extension == "md")
418
+ if markdown_count * 100 // total > 80 and commit_type != "docs":
419
+ builder.warning(
420
+ "type",
421
+ "markdown_type_mismatch",
422
+ f"Type mismatch: {markdown_count * 100 // total}% .md files but type is {commit_type!r}; consider docs",
423
+ commit_type,
424
+ )
425
+ code_count = sum(1 for extension in extensions if extension in _load_validation_data().code_extensions)
426
+ if code_count == 0 and commit_type in {"feat", "fix"}:
427
+ builder.warning(
428
+ "type",
429
+ "no_code_type_mismatch",
430
+ f"Type mismatch: no code files changed but type is {commit_type!r}",
431
+ commit_type,
432
+ )
433
+
434
+
435
+ def _type_scope_consistency(commit_type: str, stat: str, body: tuple[str, ...], builder: _IssueBuilder) -> None:
436
+ paths = tuple(_stat_paths(stat))
437
+ lower_paths = tuple(path.lower() for path in paths)
438
+ data = _load_validation_data()
439
+ if commit_type == "docs":
440
+ has_docs = any(
441
+ PurePosixPath(path).suffix.lstrip(".").lower() in data.doc_extensions
442
+ or "/docs/" in lower_path
443
+ or "readme" in lower_path
444
+ for path, lower_path in zip(paths, lower_paths, strict=False)
445
+ )
446
+ if not has_docs:
447
+ builder.warning("type", "docs_without_docs", "Commit type 'docs' but no documentation files changed")
448
+ elif commit_type == "test":
449
+ has_test = any("/test" in path or "_test." in path or ".test." in path for path in lower_paths)
450
+ if not has_test:
451
+ builder.warning("type", "test_without_tests", "Commit type 'test' but no test files changed")
452
+ elif commit_type == "style":
453
+ has_code = any(PurePosixPath(path).suffix.lstrip(".").lower() in data.code_extensions for path in paths)
454
+ if has_code:
455
+ builder.warning("type", "style_with_code", "Commit type 'style' but code files changed")
456
+ elif commit_type == "ci":
457
+ has_ci = any(
458
+ ".github/workflows" in path or ".gitlab-ci" in path or "jenkinsfile" in path for path in lower_paths
459
+ )
460
+ if not has_ci:
461
+ builder.warning("type", "ci_without_ci", "Commit type 'ci' but no CI configuration files changed")
462
+ elif commit_type == "build":
463
+ has_build = any(
464
+ "cargo.toml" in path or "package.json" in path or "makefile" in path or "build." in path
465
+ for path in lower_paths
466
+ )
467
+ if not has_build:
468
+ builder.warning("type", "build_without_build", "Commit type 'build' but no build files changed")
469
+ elif commit_type == "refactor":
470
+ has_new_files = any(
471
+ line.strip().startswith("create mode") or "new file" in line.lower() for line in stat.splitlines()
472
+ )
473
+ if has_new_files:
474
+ builder.warning(
475
+ "type",
476
+ "refactor_with_new_files",
477
+ "Commit type 'refactor' but new files were created; verify no new capabilities were added",
478
+ )
479
+ elif commit_type == "perf":
480
+ has_perf_files = any("bench" in path or "perf" in path or "profile" in path for path in lower_paths)
481
+ details_text = " ".join(body).lower()
482
+ has_perf_details = any(term in details_text for term in ("faster", "optimization", "performance", "optimized"))
483
+ if not has_perf_files and not has_perf_details:
484
+ builder.warning(
485
+ "type",
486
+ "perf_without_evidence",
487
+ "Commit type 'perf' but no performance files or optimization keywords were found",
488
+ )
489
+
490
+
491
+ def _stat_paths(stat: str) -> list[str]:
492
+ paths: list[str] = []
493
+ for line in stat.splitlines():
494
+ stripped = line.strip()
495
+ if not stripped:
496
+ continue
497
+ if stripped.startswith("create mode"):
498
+ parts = stripped.split(maxsplit=3)
499
+ if len(parts) == 4:
500
+ paths.append(parts[3])
501
+ continue
502
+ path = stripped.split("|", maxsplit=1)[0].strip()
503
+ if path and not path[0].isdigit():
504
+ paths.append(path)
505
+ return paths
506
+
507
+
508
+ def _commit_type_text(msg: Any) -> str:
509
+ return str(getattr(msg, "commit_type", getattr(msg, "type", ""))).strip().lower()
510
+
511
+
512
+ def _scope_text(msg: Any) -> str | None:
513
+ scope = getattr(msg, "scope", None)
514
+ if scope is None:
515
+ return None
516
+ return str(scope).strip().lower()
517
+
518
+
519
+ def _summary_text(msg: Any) -> str:
520
+ summary = getattr(msg, "summary", "")
521
+ value = getattr(summary, "value", summary)
522
+ return str(value)
523
+
524
+
525
+ def _iter_strings(values: Iterable[Any]) -> Iterable[str]:
526
+ for value in values:
527
+ yield str(value)
528
+
529
+
530
+ def _normalize_name(name: str) -> str:
531
+ return name.lower().replace("-", "").replace("_", "")
532
+
533
+
534
+ __all__ = [
535
+ "ValidationIssue",
536
+ "ValidationReport",
537
+ "check_type_scope_consistency",
538
+ "is_past_tense_first_word",
539
+ "is_past_tense_verb",
540
+ "present_to_past",
541
+ "split_verb_token",
542
+ "validate_commit_message",
543
+ "validate_summary_quality",
544
+ "verb_stem",
545
+ ]