PyPI - lgit-cli - Versions diffs - 3.7.0__py3-none-any.whl - Mend

lgit-cli 3.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

lgit/__init__.py +75 -0
lgit/__main__.py +8 -0
lgit/analysis.py +326 -0
lgit/api.py +1077 -0
lgit/cache.py +338 -0
lgit/changelog.py +523 -0
lgit/cli.py +1104 -0
lgit/compose.py +2110 -0
lgit/config.py +437 -0
lgit/diffing.py +384 -0
lgit/errors.py +137 -0
lgit/git.py +852 -0
lgit/map_reduce.py +508 -0
lgit/markdown_output.py +709 -0
lgit/models.py +924 -0
lgit/normalization.py +411 -0
lgit/patch.py +784 -0
lgit/profile.py +426 -0
lgit/py.typed +0 -0
lgit/repo.py +287 -0
lgit/resources/__init__.py +1 -0
lgit/resources/commit_types.json +242 -0
lgit/resources/prompts/analysis/default.md +237 -0
lgit/resources/prompts/analysis/markdown.md +112 -0
lgit/resources/prompts/changelog/default.md +89 -0
lgit/resources/prompts/changelog/markdown.md +60 -0
lgit/resources/prompts/compose-bind/default.md +40 -0
lgit/resources/prompts/compose-bind/markdown.md +41 -0
lgit/resources/prompts/compose-intent/default.md +63 -0
lgit/resources/prompts/compose-intent/markdown.md +59 -0
lgit/resources/prompts/fast/default.md +46 -0
lgit/resources/prompts/fast/markdown.md +51 -0
lgit/resources/prompts/map/default.md +67 -0
lgit/resources/prompts/map/markdown.md +63 -0
lgit/resources/prompts/reduce/default.md +81 -0
lgit/resources/prompts/reduce/markdown.md +68 -0
lgit/resources/prompts/summary/default.md +74 -0
lgit/resources/prompts/summary/markdown.md +77 -0
lgit/resources/validation_data.json +1 -0
lgit/rewrite.py +392 -0
lgit/style.py +295 -0
lgit/templates.py +385 -0
lgit/testing/__init__.py +62 -0
lgit/testing/compare.py +57 -0
lgit/testing/fixture.py +386 -0
lgit/testing/report.py +201 -0
lgit/testing/runner.py +256 -0
lgit/tokens.py +90 -0
lgit/validation.py +545 -0
lgit_cli-3.7.0.dist-info/METADATA +288 -0
lgit_cli-3.7.0.dist-info/RECORD +54 -0
lgit_cli-3.7.0.dist-info/WHEEL +4 -0
lgit_cli-3.7.0.dist-info/entry_points.txt +2 -0
lgit_cli-3.7.0.dist-info/licenses/LICENSE +21 -0

lgit/diffing.py ADDED Viewed

@@ -0,0 +1,384 @@
+"""Unified diff parsing, truncation, and whitespace classification."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Protocol
+class _TokenCounter(Protocol):
+    def count_sync(self, text: str) -> int: ...
+_DEFAULT_LOW_PRIORITY_EXTENSIONS = {
+    "lock",
+    "log",
+    "md",
+    "txt",
+    "json",
+    "yaml",
+    "yml",
+    "toml",
+    "sum",
+    "tmp",
+    "bak",
+}
+@dataclass(slots=True)
+class FileDiff:
+    """A single file section from a unified git diff."""
+    filename: str
+    header: str
+    content: str = ""
+    additions: int = 0
+    deletions: int = 0
+    is_binary: bool = False
+    @property
+    def size(self) -> int:
+        """Return the UTF-8 byte size used for budgeting."""
+        return _byte_len(self.header) + _byte_len(self.content)
+    def token_estimate(self, counter: _TokenCounter | None = None) -> int:
+        """Estimate token count using a provided counter or a 4-char heuristic."""
+        if counter is None:
+            return max(1, (len(self.header) + len(self.content)) // 4)
+        count = getattr(counter, "count_sync", None)
+        if callable(count):
+            return int(count(self.header)) + int(count(self.content))
+        if callable(counter):
+            return int(counter(self.header)) + int(counter(self.content))
+        return max(1, (len(self.header) + len(self.content)) // 4)
+    def priority(self, config: object | None = None) -> int:
+        """Rank this file for context retention; higher values are kept first."""
+        if self.is_binary:
+            return -100
+        filename_lower = self.filename.lower()
+        if filename_lower.endswith(("cargo.toml", "package.json", "go.mod", "requirements.txt", "pyproject.toml")):
+            return 70
+        if "prompt" in filename_lower or "system" in filename_lower:
+            return 100
+        if (
+            "/test" in self.filename
+            or "test_" in self.filename
+            or "_test." in self.filename
+            or ".test." in self.filename
+        ):
+            return 10
+        low_priority = getattr(config, "low_priority_extensions", _DEFAULT_LOW_PRIORITY_EXTENSIONS)
+        ext = self.filename.rsplit(".", 1)[-1] if "." in self.filename else ""
+        if any(str(item).lstrip(".") == ext for item in low_priority):
+            return 20
+        match ext:
+            case "rs" | "go" | "py" | "js" | "ts" | "tsx" | "jsx" | "java" | "c" | "cpp" | "h" | "hpp":
+                return 100
+            case "sql" | "sh" | "bash":
+                return 80
+            case _:
+                return 50
+    def truncate(self, max_size: int) -> None:
+        """Truncate content in place while preserving headers and useful edges."""
+        if self.size <= max_size:
+            return
+        truncation_suffix = "\n... (truncated)"
+        available = max_size - _byte_len(self.header) - _byte_len(truncation_suffix)
+        if available < 50:
+            self.content = "... (truncated)"
+            return
+        lines = self.content.splitlines()
+        if len(lines) > 30:
+            keep_start = 15
+            keep_end = 10
+            omitted = len(lines) - keep_start - keep_end
+            self.content = "\n".join([*lines[:keep_start], f"... (truncated {omitted} lines) ...", *lines[-keep_end:]])
+            return
+        self.content = _truncate_utf8(self.content, available) + truncation_suffix
+def _byte_len(text: str) -> int:
+    return len(text.encode("utf-8"))
+def _truncate_utf8(text: str, max_bytes: int) -> str:
+    data = text.encode("utf-8")
+    if len(data) <= max_bytes:
+        return text
+    return data[:max_bytes].decode("utf-8", errors="ignore")
+@dataclass(slots=True)
+class WhitespaceReport:
+    """Classification of a diff by whitespace-only and substantive files."""
+    whitespace_only_files: list[str] = field(default_factory=list)
+    has_substantive: bool = False
+    @property
+    def all_whitespace(self) -> bool:
+        """Return true when every changed file only changes whitespace."""
+        return bool(self.whitespace_only_files) and not self.has_substantive
+    @property
+    def is_whitespace_only(self) -> bool:
+        """Return true when every changed file only changes whitespace."""
+        return self.all_whitespace
+def parse_diff(diff: str) -> list[FileDiff]:
+    """Parse a unified git diff into file-level sections."""
+    file_diffs: list[FileDiff] = []
+    current: FileDiff | None = None
+    in_diff_header = False
+    for line in diff.splitlines():
+        if line.startswith("diff --git"):
+            if current is not None:
+                file_diffs.append(current)
+            parts = line.split()
+            filename = parts[3].removeprefix("b/") if len(parts) > 3 else "unknown"
+            current = FileDiff(filename=filename, header=line)
+            in_diff_header = True
+            continue
+        if current is None:
+            continue
+        if line.startswith("Binary files"):
+            current.is_binary = True
+            current.header += "\n" + line
+        elif line.startswith(
+            (
+                "index ",
+                "new file",
+                "deleted file",
+                "rename ",
+                "copy ",
+                "similarity index",
+                "dissimilarity index",
+                "old mode",
+                "new mode",
+                "+++",
+                "---",
+            )
+        ):
+            current.header += "\n" + line
+        elif line.startswith("@@"):
+            in_diff_header = False
+            current.header += "\n" + line
+        elif not in_diff_header:
+            if current.content:
+                current.content += "\n"
+            current.content += line
+            if line.startswith("+") and not line.startswith("+++"):
+                current.additions += 1
+            elif line.startswith("-") and not line.startswith("---"):
+                current.deletions += 1
+        else:
+            current.header += "\n" + line
+    if current is not None:
+        file_diffs.append(current)
+    return file_diffs
+def reconstruct_diff(files: list[FileDiff] | tuple[FileDiff, ...]) -> str:
+    """Reconstruct a unified diff from parsed file objects."""
+    sections: list[str] = []
+    for file in files:
+        if file.content:
+            sections.append(f"{file.header}\n{file.content}")
+        else:
+            sections.append(file.header)
+    return "\n".join(sections)
+def smart_truncate_diff(
+    diff: str,
+    max_length: int,
+    config: object | None = None,
+    counter: _TokenCounter | None = None,
+) -> str:
+    """Truncate a diff by file priority while retaining whole-file scope."""
+    file_diffs = [file for file in parse_diff(diff) if not _is_excluded(file.filename, config)]
+    if not file_diffs:
+        return "No relevant files to analyze (only lock files or excluded files were changed)"
+    file_diffs.sort(key=lambda file: file.priority(config), reverse=True)
+    total_size = sum(file.size for file in file_diffs)
+    total_tokens = sum(file.token_estimate(counter) for file in file_diffs)
+    max_diff_tokens = int(getattr(config, "max_diff_tokens", 16_000))
+    effective_max = max_diff_tokens * 4 if total_tokens > max_diff_tokens else max_length
+    if total_size <= effective_max:
+        return reconstruct_diff(file_diffs)
+    included: list[FileDiff] = []
+    header_only_size = sum(_byte_len(file.header) + 20 for file in file_diffs)
+    total_files = len(file_diffs)
+    if header_only_size <= effective_max:
+        remaining_space = max(0, effective_max - header_only_size)
+        space_per_file = remaining_space // len(file_diffs) if file_diffs else 0
+        for file in file_diffs:
+            if file.is_binary:
+                included.append(FileDiff(file.filename, file.header, "", file.additions, file.deletions, True))
+                continue
+            target_size = _byte_len(file.header) + space_per_file
+            if file.size > target_size:
+                file.truncate(target_size)
+            included.append(file)
+    else:
+        current_size = 0
+        for file in file_diffs:
+            if file.is_binary:
+                continue
+            if current_size + file.size <= effective_max:
+                current_size += file.size
+                included.append(file)
+            elif current_size < effective_max // 2 and file.priority(config) >= 50:
+                file.truncate(max(0, effective_max - current_size - 100))
+                included.append(file)
+                break
+    if not included:
+        return "Error: Could not include any files in the diff"
+    result = reconstruct_diff(included)
+    excluded_count = total_files - len(included)
+    if excluded_count > 0:
+        result += f"\n\n... ({excluded_count} files omitted) ..."
+    return result
+def truncate_diff_by_lines(diff: str, max_lines: int, config: object | None = None) -> str:
+    """Truncate a diff to a line budget, distributing lines by file priority."""
+    files = parse_diff(diff)
+    total_lines = sum(len(file.header.splitlines()) + len(file.content.splitlines()) for file in files)
+    if total_lines <= max_lines:
+        return diff
+    total_priority = sum(max(1, file.priority(config)) for file in files) or 1
+    result: list[str] = []
+    for file in files:
+        result.extend(file.header.splitlines())
+        content_lines = file.content.splitlines()
+        priority = max(1, file.priority(config))
+        allocated = max(5, int(max_lines * priority / total_priority))
+        if len(content_lines) <= allocated:
+            result.extend(content_lines)
+            if not content_lines:
+                result.append("")
+            continue
+        keep_start = allocated // 2
+        keep_end = allocated - keep_start
+        omitted = len(content_lines) - keep_start - keep_end
+        result.extend(content_lines[:keep_start])
+        result.append(f"[... {omitted} lines omitted ...]")
+        result.extend(content_lines[-keep_end:])
+    return "\n".join(result) + ("\n" if result else "")
+def classify_diff_whitespace(diff: str) -> WhitespaceReport:
+    """Classify a unified diff by whitespace-only versus substantive files."""
+    _, sections = _file_sections(diff)
+    report = WhitespaceReport()
+    for path, section in sections:
+        if _section_is_whitespace_only(section):
+            report.whitespace_only_files.append(path)
+        else:
+            report.has_substantive = True
+    return report
+def strip_whitespace_only_files(diff: str) -> str | None:
+    """Return diff without whitespace-only file sections, or None if unchanged."""
+    preamble, sections = _file_sections(diff)
+    if not sections:
+        return None
+    kept: list[str] = []
+    stripped_any = False
+    for _, section in sections:
+        if _section_is_whitespace_only(section):
+            stripped_any = True
+        else:
+            kept.append(section)
+    if not stripped_any or not kept:
+        return None
+    return preamble + "".join(kept)
+def _is_excluded(filename: str, config: object | None) -> bool:
+    excluded = getattr(config, "excluded_files", ())
+    return any(filename.endswith(str(pattern)) for pattern in excluded)
+def _file_section_starts(diff: str) -> list[int]:
+    starts: list[int] = []
+    search_from = 0
+    while True:
+        idx = diff.find("diff --git", search_from)
+        if idx == -1:
+            return starts
+        if idx == 0 or diff[idx - 1] == "\n":
+            starts.append(idx)
+        search_from = idx + len("diff --git")
+def _file_sections(diff: str) -> tuple[str, list[tuple[str, str]]]:
+    starts = _file_section_starts(diff)
+    if not starts:
+        return diff, []
+    preamble = diff[: starts[0]]
+    sections: list[tuple[str, str]] = []
+    for index, start in enumerate(starts):
+        end = starts[index + 1] if index + 1 < len(starts) else len(diff)
+        section = diff[start:end]
+        first_line = section.splitlines()[0] if section else ""
+        parts = first_line.split()
+        path = parts[3].removeprefix("b/") if len(parts) > 3 else "unknown"
+        sections.append((path, section))
+    return preamble, sections
+def _section_is_whitespace_only(section: str) -> bool:
+    added: list[str] = []
+    removed: list[str] = []
+    has_change = False
+    for line in section.splitlines():
+        if line.startswith(("Binary files", "rename from", "rename to", "copy from", "copy to")):
+            return False
+        if line.startswith(("+++", "---")):
+            continue
+        if line.startswith("+"):
+            has_change = True
+            added.extend(ch for ch in line[1:] if not ch.isspace())
+        elif line.startswith("-"):
+            has_change = True
+            removed.extend(ch for ch in line[1:] if not ch.isspace())
+    return has_change and added == removed

lgit/errors.py ADDED Viewed

@@ -0,0 +1,137 @@
+"""Shared exception hierarchy for lgit."""
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+class LgitError(Exception):
+    """Base class for all expected lgit failures."""
+@dataclass(slots=True)
+class GitError(LgitError):
+    """A git subprocess or repository operation failed."""
+    message: str
+    def __str__(self) -> str:
+        return f"git: {self.message}"
+class GitIndexLocked(GitError):
+    """The repository index lock exists and prevents git operations."""
+    lock_path: Path
+    def __init__(self, lock_path: Path) -> None:
+        super().__init__("git index is locked")
+        self.lock_path = lock_path
+    def __str__(self) -> str:
+        return f"{self.message}: {self.lock_path}"
+@dataclass(slots=True)
+class ApiError(LgitError):
+    """An API request failed with a non-successful response."""
+    status: int
+    body: str
+    def __str__(self) -> str:
+        return f"API request failed (HTTP {self.status}): {self.body}"
+class ApiContextLengthExceeded(ApiError):
+    """The selected model could not fit the request in its context window."""
+    operation: str
+    model: str
+    def __init__(self, *, operation: str, model: str, status: int, body: str) -> None:
+        super().__init__(status=status, body=body)
+        self.operation = operation
+        self.model = model
+    def __str__(self) -> str:
+        return (
+            "API request exceeded the model context window during "
+            f"{self.operation} ({self.model}, HTTP {self.status}): {self.body}"
+        )
+@dataclass(slots=True)
+class ValidationFailure(LgitError):
+    """Domain validation rejected a value."""
+    message: str
+    field: str | None = None
+    value: Any | None = None
+    def __str__(self) -> str:
+        if self.field is None:
+            return self.message
+        return f"{self.field}: {self.message}"
+@dataclass(slots=True)
+class NoChanges(LgitError):
+    """No staged, unstaged, or compose changes were available to analyze."""
+    mode: str
+    def __str__(self) -> str:
+        return f"No changes found in {self.mode} mode"
+@dataclass(slots=True)
+class ConfigError(LgitError):
+    """Configuration loading or validation failed."""
+    message: str
+    def __str__(self) -> str:
+        return self.message
+class InvalidCommitType(ValidationFailure):
+    """A commit type token is not canonical and is not a known alias."""
+class InvalidScope(ValidationFailure):
+    """A conventional-commit scope has invalid syntax."""
+@dataclass(slots=True)
+class SummaryTooLong(ValidationFailure):
+    """A commit summary exceeded the configured hard limit."""
+    length: int = 0
+    max_length: int = 0
+    def __init__(self, length: int, max_length: int) -> None:
+        super().__init__(
+            f"summary too long: {length} chars (max {max_length})",
+            field="summary",
+            value=length,
+        )
+        self.length = length
+        self.max_length = max_length
+__all__ = [
+    "LgitError",
+    "GitError",
+    "GitIndexLocked",
+    "ApiError",
+    "ApiContextLengthExceeded",
+    "ValidationFailure",
+    "NoChanges",
+    "ConfigError",
+    "InvalidCommitType",
+    "InvalidScope",
+    "SummaryTooLong",
+]