lgit-cli 3.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. lgit/__init__.py +75 -0
  2. lgit/__main__.py +8 -0
  3. lgit/analysis.py +326 -0
  4. lgit/api.py +1077 -0
  5. lgit/cache.py +338 -0
  6. lgit/changelog.py +523 -0
  7. lgit/cli.py +1104 -0
  8. lgit/compose.py +2110 -0
  9. lgit/config.py +437 -0
  10. lgit/diffing.py +384 -0
  11. lgit/errors.py +137 -0
  12. lgit/git.py +852 -0
  13. lgit/map_reduce.py +508 -0
  14. lgit/markdown_output.py +709 -0
  15. lgit/models.py +924 -0
  16. lgit/normalization.py +411 -0
  17. lgit/patch.py +784 -0
  18. lgit/profile.py +426 -0
  19. lgit/py.typed +0 -0
  20. lgit/repo.py +287 -0
  21. lgit/resources/__init__.py +1 -0
  22. lgit/resources/commit_types.json +242 -0
  23. lgit/resources/prompts/analysis/default.md +237 -0
  24. lgit/resources/prompts/analysis/markdown.md +112 -0
  25. lgit/resources/prompts/changelog/default.md +89 -0
  26. lgit/resources/prompts/changelog/markdown.md +60 -0
  27. lgit/resources/prompts/compose-bind/default.md +40 -0
  28. lgit/resources/prompts/compose-bind/markdown.md +41 -0
  29. lgit/resources/prompts/compose-intent/default.md +63 -0
  30. lgit/resources/prompts/compose-intent/markdown.md +59 -0
  31. lgit/resources/prompts/fast/default.md +46 -0
  32. lgit/resources/prompts/fast/markdown.md +51 -0
  33. lgit/resources/prompts/map/default.md +67 -0
  34. lgit/resources/prompts/map/markdown.md +63 -0
  35. lgit/resources/prompts/reduce/default.md +81 -0
  36. lgit/resources/prompts/reduce/markdown.md +68 -0
  37. lgit/resources/prompts/summary/default.md +74 -0
  38. lgit/resources/prompts/summary/markdown.md +77 -0
  39. lgit/resources/validation_data.json +1 -0
  40. lgit/rewrite.py +392 -0
  41. lgit/style.py +295 -0
  42. lgit/templates.py +385 -0
  43. lgit/testing/__init__.py +62 -0
  44. lgit/testing/compare.py +57 -0
  45. lgit/testing/fixture.py +386 -0
  46. lgit/testing/report.py +201 -0
  47. lgit/testing/runner.py +256 -0
  48. lgit/tokens.py +90 -0
  49. lgit/validation.py +545 -0
  50. lgit_cli-3.7.0.dist-info/METADATA +288 -0
  51. lgit_cli-3.7.0.dist-info/RECORD +54 -0
  52. lgit_cli-3.7.0.dist-info/WHEEL +4 -0
  53. lgit_cli-3.7.0.dist-info/entry_points.txt +2 -0
  54. lgit_cli-3.7.0.dist-info/licenses/LICENSE +21 -0
lgit/diffing.py ADDED
@@ -0,0 +1,384 @@
1
+ """Unified diff parsing, truncation, and whitespace classification."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Protocol
7
+
8
+
9
+ class _TokenCounter(Protocol):
10
+ def count_sync(self, text: str) -> int: ...
11
+
12
+
13
+ _DEFAULT_LOW_PRIORITY_EXTENSIONS = {
14
+ "lock",
15
+ "log",
16
+ "md",
17
+ "txt",
18
+ "json",
19
+ "yaml",
20
+ "yml",
21
+ "toml",
22
+ "sum",
23
+ "tmp",
24
+ "bak",
25
+ }
26
+
27
+
28
+ @dataclass(slots=True)
29
+ class FileDiff:
30
+ """A single file section from a unified git diff."""
31
+
32
+ filename: str
33
+ header: str
34
+ content: str = ""
35
+ additions: int = 0
36
+ deletions: int = 0
37
+ is_binary: bool = False
38
+
39
+ @property
40
+ def size(self) -> int:
41
+ """Return the UTF-8 byte size used for budgeting."""
42
+
43
+ return _byte_len(self.header) + _byte_len(self.content)
44
+
45
+ def token_estimate(self, counter: _TokenCounter | None = None) -> int:
46
+ """Estimate token count using a provided counter or a 4-char heuristic."""
47
+
48
+ if counter is None:
49
+ return max(1, (len(self.header) + len(self.content)) // 4)
50
+ count = getattr(counter, "count_sync", None)
51
+ if callable(count):
52
+ return int(count(self.header)) + int(count(self.content))
53
+ if callable(counter):
54
+ return int(counter(self.header)) + int(counter(self.content))
55
+ return max(1, (len(self.header) + len(self.content)) // 4)
56
+
57
+ def priority(self, config: object | None = None) -> int:
58
+ """Rank this file for context retention; higher values are kept first."""
59
+
60
+ if self.is_binary:
61
+ return -100
62
+
63
+ filename_lower = self.filename.lower()
64
+ if filename_lower.endswith(("cargo.toml", "package.json", "go.mod", "requirements.txt", "pyproject.toml")):
65
+ return 70
66
+ if "prompt" in filename_lower or "system" in filename_lower:
67
+ return 100
68
+ if (
69
+ "/test" in self.filename
70
+ or "test_" in self.filename
71
+ or "_test." in self.filename
72
+ or ".test." in self.filename
73
+ ):
74
+ return 10
75
+
76
+ low_priority = getattr(config, "low_priority_extensions", _DEFAULT_LOW_PRIORITY_EXTENSIONS)
77
+ ext = self.filename.rsplit(".", 1)[-1] if "." in self.filename else ""
78
+ if any(str(item).lstrip(".") == ext for item in low_priority):
79
+ return 20
80
+
81
+ match ext:
82
+ case "rs" | "go" | "py" | "js" | "ts" | "tsx" | "jsx" | "java" | "c" | "cpp" | "h" | "hpp":
83
+ return 100
84
+ case "sql" | "sh" | "bash":
85
+ return 80
86
+ case _:
87
+ return 50
88
+
89
+ def truncate(self, max_size: int) -> None:
90
+ """Truncate content in place while preserving headers and useful edges."""
91
+
92
+ if self.size <= max_size:
93
+ return
94
+
95
+ truncation_suffix = "\n... (truncated)"
96
+ available = max_size - _byte_len(self.header) - _byte_len(truncation_suffix)
97
+ if available < 50:
98
+ self.content = "... (truncated)"
99
+ return
100
+
101
+ lines = self.content.splitlines()
102
+ if len(lines) > 30:
103
+ keep_start = 15
104
+ keep_end = 10
105
+ omitted = len(lines) - keep_start - keep_end
106
+ self.content = "\n".join([*lines[:keep_start], f"... (truncated {omitted} lines) ...", *lines[-keep_end:]])
107
+ return
108
+
109
+ self.content = _truncate_utf8(self.content, available) + truncation_suffix
110
+
111
+
112
+ def _byte_len(text: str) -> int:
113
+ return len(text.encode("utf-8"))
114
+
115
+
116
+ def _truncate_utf8(text: str, max_bytes: int) -> str:
117
+ data = text.encode("utf-8")
118
+ if len(data) <= max_bytes:
119
+ return text
120
+ return data[:max_bytes].decode("utf-8", errors="ignore")
121
+
122
+
123
+ @dataclass(slots=True)
124
+ class WhitespaceReport:
125
+ """Classification of a diff by whitespace-only and substantive files."""
126
+
127
+ whitespace_only_files: list[str] = field(default_factory=list)
128
+ has_substantive: bool = False
129
+
130
+ @property
131
+ def all_whitespace(self) -> bool:
132
+ """Return true when every changed file only changes whitespace."""
133
+
134
+ return bool(self.whitespace_only_files) and not self.has_substantive
135
+
136
+ @property
137
+ def is_whitespace_only(self) -> bool:
138
+ """Return true when every changed file only changes whitespace."""
139
+
140
+ return self.all_whitespace
141
+
142
+
143
+ def parse_diff(diff: str) -> list[FileDiff]:
144
+ """Parse a unified git diff into file-level sections."""
145
+
146
+ file_diffs: list[FileDiff] = []
147
+ current: FileDiff | None = None
148
+ in_diff_header = False
149
+
150
+ for line in diff.splitlines():
151
+ if line.startswith("diff --git"):
152
+ if current is not None:
153
+ file_diffs.append(current)
154
+ parts = line.split()
155
+ filename = parts[3].removeprefix("b/") if len(parts) > 3 else "unknown"
156
+ current = FileDiff(filename=filename, header=line)
157
+ in_diff_header = True
158
+ continue
159
+
160
+ if current is None:
161
+ continue
162
+
163
+ if line.startswith("Binary files"):
164
+ current.is_binary = True
165
+ current.header += "\n" + line
166
+ elif line.startswith(
167
+ (
168
+ "index ",
169
+ "new file",
170
+ "deleted file",
171
+ "rename ",
172
+ "copy ",
173
+ "similarity index",
174
+ "dissimilarity index",
175
+ "old mode",
176
+ "new mode",
177
+ "+++",
178
+ "---",
179
+ )
180
+ ):
181
+ current.header += "\n" + line
182
+ elif line.startswith("@@"):
183
+ in_diff_header = False
184
+ current.header += "\n" + line
185
+ elif not in_diff_header:
186
+ if current.content:
187
+ current.content += "\n"
188
+ current.content += line
189
+ if line.startswith("+") and not line.startswith("+++"):
190
+ current.additions += 1
191
+ elif line.startswith("-") and not line.startswith("---"):
192
+ current.deletions += 1
193
+ else:
194
+ current.header += "\n" + line
195
+
196
+ if current is not None:
197
+ file_diffs.append(current)
198
+ return file_diffs
199
+
200
+
201
+ def reconstruct_diff(files: list[FileDiff] | tuple[FileDiff, ...]) -> str:
202
+ """Reconstruct a unified diff from parsed file objects."""
203
+
204
+ sections: list[str] = []
205
+ for file in files:
206
+ if file.content:
207
+ sections.append(f"{file.header}\n{file.content}")
208
+ else:
209
+ sections.append(file.header)
210
+ return "\n".join(sections)
211
+
212
+
213
+ def smart_truncate_diff(
214
+ diff: str,
215
+ max_length: int,
216
+ config: object | None = None,
217
+ counter: _TokenCounter | None = None,
218
+ ) -> str:
219
+ """Truncate a diff by file priority while retaining whole-file scope."""
220
+
221
+ file_diffs = [file for file in parse_diff(diff) if not _is_excluded(file.filename, config)]
222
+ if not file_diffs:
223
+ return "No relevant files to analyze (only lock files or excluded files were changed)"
224
+
225
+ file_diffs.sort(key=lambda file: file.priority(config), reverse=True)
226
+ total_size = sum(file.size for file in file_diffs)
227
+ total_tokens = sum(file.token_estimate(counter) for file in file_diffs)
228
+ max_diff_tokens = int(getattr(config, "max_diff_tokens", 16_000))
229
+ effective_max = max_diff_tokens * 4 if total_tokens > max_diff_tokens else max_length
230
+
231
+ if total_size <= effective_max:
232
+ return reconstruct_diff(file_diffs)
233
+
234
+ included: list[FileDiff] = []
235
+ header_only_size = sum(_byte_len(file.header) + 20 for file in file_diffs)
236
+ total_files = len(file_diffs)
237
+
238
+ if header_only_size <= effective_max:
239
+ remaining_space = max(0, effective_max - header_only_size)
240
+ space_per_file = remaining_space // len(file_diffs) if file_diffs else 0
241
+ for file in file_diffs:
242
+ if file.is_binary:
243
+ included.append(FileDiff(file.filename, file.header, "", file.additions, file.deletions, True))
244
+ continue
245
+ target_size = _byte_len(file.header) + space_per_file
246
+ if file.size > target_size:
247
+ file.truncate(target_size)
248
+ included.append(file)
249
+ else:
250
+ current_size = 0
251
+ for file in file_diffs:
252
+ if file.is_binary:
253
+ continue
254
+ if current_size + file.size <= effective_max:
255
+ current_size += file.size
256
+ included.append(file)
257
+ elif current_size < effective_max // 2 and file.priority(config) >= 50:
258
+ file.truncate(max(0, effective_max - current_size - 100))
259
+ included.append(file)
260
+ break
261
+
262
+ if not included:
263
+ return "Error: Could not include any files in the diff"
264
+
265
+ result = reconstruct_diff(included)
266
+ excluded_count = total_files - len(included)
267
+ if excluded_count > 0:
268
+ result += f"\n\n... ({excluded_count} files omitted) ..."
269
+ return result
270
+
271
+
272
+ def truncate_diff_by_lines(diff: str, max_lines: int, config: object | None = None) -> str:
273
+ """Truncate a diff to a line budget, distributing lines by file priority."""
274
+
275
+ files = parse_diff(diff)
276
+ total_lines = sum(len(file.header.splitlines()) + len(file.content.splitlines()) for file in files)
277
+ if total_lines <= max_lines:
278
+ return diff
279
+
280
+ total_priority = sum(max(1, file.priority(config)) for file in files) or 1
281
+ result: list[str] = []
282
+ for file in files:
283
+ result.extend(file.header.splitlines())
284
+ content_lines = file.content.splitlines()
285
+ priority = max(1, file.priority(config))
286
+ allocated = max(5, int(max_lines * priority / total_priority))
287
+ if len(content_lines) <= allocated:
288
+ result.extend(content_lines)
289
+ if not content_lines:
290
+ result.append("")
291
+ continue
292
+ keep_start = allocated // 2
293
+ keep_end = allocated - keep_start
294
+ omitted = len(content_lines) - keep_start - keep_end
295
+ result.extend(content_lines[:keep_start])
296
+ result.append(f"[... {omitted} lines omitted ...]")
297
+ result.extend(content_lines[-keep_end:])
298
+ return "\n".join(result) + ("\n" if result else "")
299
+
300
+
301
+ def classify_diff_whitespace(diff: str) -> WhitespaceReport:
302
+ """Classify a unified diff by whitespace-only versus substantive files."""
303
+
304
+ _, sections = _file_sections(diff)
305
+ report = WhitespaceReport()
306
+ for path, section in sections:
307
+ if _section_is_whitespace_only(section):
308
+ report.whitespace_only_files.append(path)
309
+ else:
310
+ report.has_substantive = True
311
+ return report
312
+
313
+
314
+ def strip_whitespace_only_files(diff: str) -> str | None:
315
+ """Return diff without whitespace-only file sections, or None if unchanged."""
316
+
317
+ preamble, sections = _file_sections(diff)
318
+ if not sections:
319
+ return None
320
+
321
+ kept: list[str] = []
322
+ stripped_any = False
323
+ for _, section in sections:
324
+ if _section_is_whitespace_only(section):
325
+ stripped_any = True
326
+ else:
327
+ kept.append(section)
328
+
329
+ if not stripped_any or not kept:
330
+ return None
331
+ return preamble + "".join(kept)
332
+
333
+
334
+ def _is_excluded(filename: str, config: object | None) -> bool:
335
+ excluded = getattr(config, "excluded_files", ())
336
+ return any(filename.endswith(str(pattern)) for pattern in excluded)
337
+
338
+
339
+ def _file_section_starts(diff: str) -> list[int]:
340
+ starts: list[int] = []
341
+ search_from = 0
342
+ while True:
343
+ idx = diff.find("diff --git", search_from)
344
+ if idx == -1:
345
+ return starts
346
+ if idx == 0 or diff[idx - 1] == "\n":
347
+ starts.append(idx)
348
+ search_from = idx + len("diff --git")
349
+
350
+
351
+ def _file_sections(diff: str) -> tuple[str, list[tuple[str, str]]]:
352
+ starts = _file_section_starts(diff)
353
+ if not starts:
354
+ return diff, []
355
+ preamble = diff[: starts[0]]
356
+ sections: list[tuple[str, str]] = []
357
+ for index, start in enumerate(starts):
358
+ end = starts[index + 1] if index + 1 < len(starts) else len(diff)
359
+ section = diff[start:end]
360
+ first_line = section.splitlines()[0] if section else ""
361
+ parts = first_line.split()
362
+ path = parts[3].removeprefix("b/") if len(parts) > 3 else "unknown"
363
+ sections.append((path, section))
364
+ return preamble, sections
365
+
366
+
367
+ def _section_is_whitespace_only(section: str) -> bool:
368
+ added: list[str] = []
369
+ removed: list[str] = []
370
+ has_change = False
371
+
372
+ for line in section.splitlines():
373
+ if line.startswith(("Binary files", "rename from", "rename to", "copy from", "copy to")):
374
+ return False
375
+ if line.startswith(("+++", "---")):
376
+ continue
377
+ if line.startswith("+"):
378
+ has_change = True
379
+ added.extend(ch for ch in line[1:] if not ch.isspace())
380
+ elif line.startswith("-"):
381
+ has_change = True
382
+ removed.extend(ch for ch in line[1:] if not ch.isspace())
383
+
384
+ return has_change and added == removed
lgit/errors.py ADDED
@@ -0,0 +1,137 @@
1
+ """Shared exception hierarchy for lgit."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+
10
+ class LgitError(Exception):
11
+ """Base class for all expected lgit failures."""
12
+
13
+
14
+ @dataclass(slots=True)
15
+ class GitError(LgitError):
16
+ """A git subprocess or repository operation failed."""
17
+
18
+ message: str
19
+
20
+ def __str__(self) -> str:
21
+ return f"git: {self.message}"
22
+
23
+
24
+ class GitIndexLocked(GitError):
25
+ """The repository index lock exists and prevents git operations."""
26
+
27
+ lock_path: Path
28
+
29
+ def __init__(self, lock_path: Path) -> None:
30
+ super().__init__("git index is locked")
31
+ self.lock_path = lock_path
32
+
33
+ def __str__(self) -> str:
34
+ return f"{self.message}: {self.lock_path}"
35
+
36
+
37
+ @dataclass(slots=True)
38
+ class ApiError(LgitError):
39
+ """An API request failed with a non-successful response."""
40
+
41
+ status: int
42
+ body: str
43
+
44
+ def __str__(self) -> str:
45
+ return f"API request failed (HTTP {self.status}): {self.body}"
46
+
47
+
48
+ class ApiContextLengthExceeded(ApiError):
49
+ """The selected model could not fit the request in its context window."""
50
+
51
+ operation: str
52
+ model: str
53
+
54
+ def __init__(self, *, operation: str, model: str, status: int, body: str) -> None:
55
+ super().__init__(status=status, body=body)
56
+ self.operation = operation
57
+ self.model = model
58
+
59
+ def __str__(self) -> str:
60
+ return (
61
+ "API request exceeded the model context window during "
62
+ f"{self.operation} ({self.model}, HTTP {self.status}): {self.body}"
63
+ )
64
+
65
+
66
+ @dataclass(slots=True)
67
+ class ValidationFailure(LgitError):
68
+ """Domain validation rejected a value."""
69
+
70
+ message: str
71
+ field: str | None = None
72
+ value: Any | None = None
73
+
74
+ def __str__(self) -> str:
75
+ if self.field is None:
76
+ return self.message
77
+ return f"{self.field}: {self.message}"
78
+
79
+
80
+ @dataclass(slots=True)
81
+ class NoChanges(LgitError):
82
+ """No staged, unstaged, or compose changes were available to analyze."""
83
+
84
+ mode: str
85
+
86
+ def __str__(self) -> str:
87
+ return f"No changes found in {self.mode} mode"
88
+
89
+
90
+ @dataclass(slots=True)
91
+ class ConfigError(LgitError):
92
+ """Configuration loading or validation failed."""
93
+
94
+ message: str
95
+
96
+ def __str__(self) -> str:
97
+ return self.message
98
+
99
+
100
+ class InvalidCommitType(ValidationFailure):
101
+ """A commit type token is not canonical and is not a known alias."""
102
+
103
+
104
+ class InvalidScope(ValidationFailure):
105
+ """A conventional-commit scope has invalid syntax."""
106
+
107
+
108
+ @dataclass(slots=True)
109
+ class SummaryTooLong(ValidationFailure):
110
+ """A commit summary exceeded the configured hard limit."""
111
+
112
+ length: int = 0
113
+ max_length: int = 0
114
+
115
+ def __init__(self, length: int, max_length: int) -> None:
116
+ super().__init__(
117
+ f"summary too long: {length} chars (max {max_length})",
118
+ field="summary",
119
+ value=length,
120
+ )
121
+ self.length = length
122
+ self.max_length = max_length
123
+
124
+
125
+ __all__ = [
126
+ "LgitError",
127
+ "GitError",
128
+ "GitIndexLocked",
129
+ "ApiError",
130
+ "ApiContextLengthExceeded",
131
+ "ValidationFailure",
132
+ "NoChanges",
133
+ "ConfigError",
134
+ "InvalidCommitType",
135
+ "InvalidScope",
136
+ "SummaryTooLong",
137
+ ]