PyPI - llm-commit-helper - Versions diffs - 0.1.0__py3-none-any.whl - Mend

llm-commit-helper 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

llm_commit_helper/__init__.py +7 -0
llm_commit_helper/__main__.py +10 -0
llm_commit_helper/cli.py +214 -0
llm_commit_helper/config.py +132 -0
llm_commit_helper/diff_engine.py +91 -0
llm_commit_helper/formatters/__init__.py +35 -0
llm_commit_helper/formatters/generic_fmt.py +47 -0
llm_commit_helper/formatters/python_fmt.py +81 -0
llm_commit_helper/formatters/verilog_fmt.py +116 -0
llm_commit_helper/git_staged.py +244 -0
llm_commit_helper/output.py +83 -0
llm_commit_helper/submodule.py +96 -0
llm_commit_helper/utils.py +124 -0
llm_commit_helper-0.1.0.dist-info/METADATA +287 -0
llm_commit_helper-0.1.0.dist-info/RECORD +18 -0
llm_commit_helper-0.1.0.dist-info/WHEEL +5 -0
llm_commit_helper-0.1.0.dist-info/entry_points.txt +2 -0
llm_commit_helper-0.1.0.dist-info/top_level.txt +1 -0

llm_commit_helper/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""llm-commit-helper: LLM-friendly replacement for git diff --staged."""
+__version__ = "0.1.0"
+# Local Variables:
+# eval: (blacken-mode)
+# End:

llm_commit_helper/__main__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""Entry point: python -m llm_commit_helper"""
+from llm_commit_helper.cli import main
+if __name__ == "__main__":
+    main()
+# Local Variables:
+# eval: (blacken-mode)
+# End:

llm_commit_helper/cli.py ADDED Viewed

@@ -0,0 +1,214 @@
+"""CLI entry point: argparse + main pipeline orchestration."""
+import argparse
+import sys
+from pathlib import Path
+from typing import Optional
+from llm_commit_helper.config import load_config, Config
+from llm_commit_helper.git_staged import (
+    get_staged_files,
+    classify_file,
+    load_file_contents,
+    FileKind,
+    FileStatus,
+)
+from llm_commit_helper.submodule import get_submodule_log, format_submodule_section
+from llm_commit_helper.formatters import format_diff
+from llm_commit_helper.output import OutputBuilder, format_file_header
+from llm_commit_helper.utils import find_git_root
+def _parse_args(argv: Optional[list[str]] = None) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        prog="llm-commit-helper",
+        description="LLM-friendly replacement for git diff --staged",
+    )
+    parser.add_argument(
+        "--config",
+        metavar="PATH",
+        help="Path to config.jsonc (overrides hierarchy search)",
+    )
+    parser.add_argument(
+        "--max-total-size",
+        metavar="SIZE",
+        help="Override max_total_size from config (e.g. 500, 20KB)",
+    )
+    parser.add_argument(
+        "--verbose",
+        "-v",
+        action="store_true",
+        help="Print extra diagnostic info to stderr",
+    )
+    return parser.parse_args(argv)
+def _make_summary_header(
+    files: list,
+    config: Config,
+    config_source: Optional[Path],
+) -> str:
+    counts: dict[str, int] = {
+        "modified": 0,
+        "added": 0,
+        "deleted": 0,
+        "excluded": 0,
+        "too_large": 0,
+        "submodule": 0,
+        "binary": 0,
+    }
+    for f in files:
+        kind_name = f.kind.value if f.kind else "modified"
+        if kind_name in counts:
+            counts[kind_name] += 1
+    total = len(files)
+    parts = []
+    for k, v in counts.items():
+        if v > 0:
+            parts.append(f"{v} {k}")
+    summary_line = f"Files: {total} total ({', '.join(parts)})"
+    cfg_line = f"Config: {config_source}" if config_source else "Config: defaults"
+    return f"=== Staged Changes Summary ===\n{summary_line}\n{cfg_line}\n\n"
+def _process_file(f, config: Config, git_root: Path, verbose: bool) -> tuple[str, str]:
+    """Process one staged file and return (header, body) strings."""
+    kind = f.kind
+    if kind == FileKind.EXCLUDED:
+        header = format_file_header(f.path, "excluded")
+        body = "[changed - excluded by rule]\n\n"
+        return header, body
+    if kind == FileKind.TOO_LARGE:
+        header = format_file_header(f.path, "too_large")
+        body = "[changed - file too large]\n\n"
+        return header, body
+    if kind == FileKind.BINARY:
+        header = format_file_header(f.path, "binary")
+        body = "[binary file changed]\n\n"
+        return header, body
+    if kind == FileKind.ADDED:
+        header = format_file_header(f.path, "added")
+        body = "[new file - contents not shown]\n\n"
+        return header, body
+    if kind == FileKind.DELETED:
+        header = format_file_header(f.path, "deleted")
+        body = "[file deleted]\n\n"
+        return header, body
+    if kind == FileKind.MODIFIED:
+        f = load_file_contents(f, git_root)
+        if f.old_content is None and f.new_content is None:
+            header = format_file_header(f.path, "modified")
+            body = "[could not retrieve file content]\n\n"
+            return header, body
+        diff_text, is_fmt_only = format_diff(f.path, f.old_content, f.new_content)
+        if is_fmt_only and not diff_text:
+            header = format_file_header(f.path, "modified", "formatting-only")
+            body = "[no logic changes - formatting only]\n\n"
+            return header, body
+        if is_fmt_only:
+            header = format_file_header(f.path, "modified", "formatting-only")
+        else:
+            header = format_file_header(f.path, "modified")
+        body = (diff_text or "[empty diff]") + "\n\n"
+        return header, body
+    # Submodule handled separately
+    return "", ""
+def main(argv: Optional[list[str]] = None) -> int:
+    args = _parse_args(argv)
+    git_root = find_git_root()
+    if git_root is None:
+        print("[llm-commit-helper] Error: not inside a git repository", file=sys.stderr)
+        return 1
+    config_path = Path(args.config) if args.config else None
+    config = load_config(config_path, start=git_root)
+    # Apply CLI override for max_total_size
+    if args.max_total_size:
+        from llm_commit_helper.utils import parse_size
+        config = Config(
+            exclude_patterns=config.exclude_patterns,
+            max_file_size=config.max_file_size,
+            max_total_size=parse_size(args.max_total_size),
+            source=config.source,
+        )
+    if args.verbose:
+        print(
+            f"[llm-commit-helper] git root: {git_root}",
+            file=sys.stderr,
+        )
+        print(
+            f"[llm-commit-helper] config: {config.source or 'defaults'}",
+            file=sys.stderr,
+        )
+        print(
+            f"[llm-commit-helper] max_total_size: {config.max_total_size}",
+            file=sys.stderr,
+        )
+    # Get and classify staged files
+    staged = get_staged_files(git_root)
+    if not staged:
+        print("No staged changes.", file=sys.stderr)
+        return 0
+    for f in staged:
+        f.kind = classify_file(f, config, git_root)
+    header = _make_summary_header(staged, config, config.source)
+    builder = OutputBuilder(config.max_total_size)
+    # Process regular files
+    for f in staged:
+        if f.kind == FileKind.SUBMODULE:
+            continue  # handle submodules after
+        file_header, file_body = _process_file(f, config, git_root, args.verbose)
+        section = file_header + file_body
+        if not builder.add_section(section, file_path=f.path):
+            if args.verbose:
+                print(
+                    f"[llm-commit-helper] Budget exceeded, truncating at {f.path}",
+                    file=sys.stderr,
+                )
+    # Process submodules
+    for f in staged:
+        if f.kind != FileKind.SUBMODULE:
+            continue
+        log_lines = get_submodule_log(f.path, f.old_hash, f.new_hash, git_root)
+        section = format_submodule_section(f.path, f.old_hash, f.new_hash, log_lines) + "\n\n"
+        builder.add_section(section, file_path=f.path)
+    output = builder.build(
+        header=header,
+        footer_template="\n=== End of Staged Changes ({total_chars} chars) ===\n",
+    )
+    print(output)
+    return 0
+# Local Variables:
+# eval: (blacken-mode)
+# End:

llm_commit_helper/config.py ADDED Viewed

@@ -0,0 +1,132 @@
+"""Configuration loading: JSONC parsing, hierarchical search, Config dataclass."""
+import json
+import re
+import sys
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+from llm_commit_helper.utils import find_git_root, parse_size
+DEFAULT_MAX_FILE_SIZE = parse_size("200MB")
+DEFAULT_MAX_TOTAL_SIZE = 20000
+CONFIG_FILENAME = "config.jsonc"
+CONFIG_DIR_NAME = ".llm-commit-helper"
+GLOBAL_CONFIG_DIR = Path.home() / ".config" / "llm-commit-helper"
+@dataclass
+class Config:
+    exclude_patterns: list[str] = field(default_factory=list)
+    max_file_size: int = DEFAULT_MAX_FILE_SIZE
+    max_total_size: int = DEFAULT_MAX_TOTAL_SIZE
+    source: Optional[Path] = None  # which file was loaded, None = defaults
+def _strip_jsonc_comments(text: str) -> str:
+    """Remove // line comments and trailing commas from JSONC text."""
+    # Remove // comments (not inside strings)
+    result = []
+    in_string = False
+    i = 0
+    while i < len(text):
+        ch = text[i]
+        if ch == '"' and (i == 0 or text[i - 1] != "\\"):
+            in_string = not in_string
+            result.append(ch)
+        elif ch == "/" and not in_string and i + 1 < len(text) and text[i + 1] == "/":
+            # Skip to end of line
+            while i < len(text) and text[i] != "\n":
+                i += 1
+            continue
+        else:
+            result.append(ch)
+        i += 1
+    stripped = "".join(result)
+    # Remove trailing commas before } or ]
+    stripped = re.sub(r",\s*([}\]])", r"\1", stripped)
+    return stripped
+def _parse_jsonc(text: str) -> dict:
+    """Parse JSONC text into a Python dict."""
+    clean = _strip_jsonc_comments(text)
+    return json.loads(clean)
+def _load_config_file(path: Path) -> Optional[Config]:
+    """Load and validate a config file. Returns None if file doesn't exist."""
+    if not path.exists():
+        return None
+    try:
+        raw = path.read_text(encoding="utf-8")
+        data = _parse_jsonc(raw)
+    except (json.JSONDecodeError, OSError) as e:
+        print(
+            f"[llm-commit-helper] Warning: failed to parse {path}: {e}",
+            file=sys.stderr,
+        )
+        return None
+    rules = data.get("rules", {})
+    exclude = rules.get("exclude", [])
+    max_file_size_raw = rules.get("max_file_size", DEFAULT_MAX_FILE_SIZE)
+    max_total_size_raw = rules.get("max_total_size", DEFAULT_MAX_TOTAL_SIZE)
+    return Config(
+        exclude_patterns=list(exclude),
+        max_file_size=parse_size(max_file_size_raw),
+        max_total_size=parse_size(max_total_size_raw),
+        source=path,
+    )
+def _candidate_paths(start: Optional[Path] = None) -> list[Path]:
+    """Return config file candidates from cwd up to git root, then global."""
+    current = (start or Path.cwd()).resolve()
+    git_root = find_git_root(current)
+    candidates: list[Path] = []
+    # Walk from cwd up to git root (or filesystem root)
+    for parent in [current, *current.parents]:
+        candidates.append(parent / CONFIG_FILENAME)
+        candidates.append(parent / CONFIG_DIR_NAME / CONFIG_FILENAME)
+        if git_root and parent == git_root:
+            break
+    # Global config
+    candidates.append(GLOBAL_CONFIG_DIR / CONFIG_FILENAME)
+    return candidates
+def load_config(
+    config_path: Optional[Path] = None,
+    start: Optional[Path] = None,
+) -> Config:
+    """Load configuration. If config_path given, use it. Otherwise search hierarchy."""
+    if config_path is not None:
+        cfg = _load_config_file(config_path)
+        if cfg is None:
+            print(
+                f"[llm-commit-helper] Warning: config file not found: {config_path}",
+                file=sys.stderr,
+            )
+            return Config()
+        return cfg
+    for candidate in _candidate_paths(start):
+        cfg = _load_config_file(candidate)
+        if cfg is not None:
+            return cfg
+    return Config()  # defaults
+# Local Variables:
+# eval: (blacken-mode)
+# End:

llm_commit_helper/diff_engine.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""Diff engine: difflib wrapper with per-hunk formatting annotation."""
+import difflib
+from typing import Callable
+def make_unified_diff(
+    old_lines: list[str],
+    new_lines: list[str],
+    fromfile: str = "old",
+    tofile: str = "new",
+    n: int = 3,
+) -> list[str]:
+    """Produce unified diff lines (with headers) using difflib."""
+    return list(
+        difflib.unified_diff(
+            old_lines,
+            new_lines,
+            fromfile=fromfile,
+            tofile=tofile,
+            n=n,
+        )
+    )
+def _split_hunks(
+    diff_lines: list[str],
+) -> tuple[list[str], list[tuple[int, int]]]:
+    """Split unified diff into header lines and list of (start, end) hunk spans."""
+    hunk_spans: list[tuple[int, int]] = []
+    hunk_start = None
+    for i, line in enumerate(diff_lines):
+        if line.startswith("@@"):
+            if hunk_start is not None:
+                hunk_spans.append((hunk_start, i))
+            hunk_start = i
+    if hunk_start is not None:
+        hunk_spans.append((hunk_start, len(diff_lines)))
+    return hunk_spans
+def annotate_formatting_hunks(
+    old_lines: list[str],
+    new_lines: list[str],
+    is_formatting_only: Callable[[list[str], list[str]], bool],
+    n: int = 3,
+) -> tuple[list[str], bool]:
+    """Produce annotated unified diff lines.
+    For each hunk that is formatting-only, inserts a [formatting-only] marker
+    after the @@ line.
+    Returns (diff_lines, all_hunks_are_formatting_only).
+    """
+    raw = make_unified_diff(old_lines, new_lines, n=n)
+    if not raw:
+        return [], True
+    hunk_spans = _split_hunks(raw)
+    if not hunk_spans:
+        return raw, False
+    result: list[str] = []
+    all_formatting = True
+    # Copy header lines (before first hunk)
+    first_hunk_start = hunk_spans[0][0]
+    result.extend(raw[:first_hunk_start])
+    for start, end in hunk_spans:
+        hunk_body = raw[start:end]
+        removed = [l[1:] for l in hunk_body if l.startswith("-") and not l.startswith("---")]
+        added = [l[1:] for l in hunk_body if l.startswith("+") and not l.startswith("+++")]
+        fmt_only = is_formatting_only(removed, added)
+        if not fmt_only:
+            all_formatting = False
+        result.append(hunk_body[0])  # the @@ line
+        if fmt_only:
+            result.append("[formatting-only]\n")
+        result.extend(hunk_body[1:])
+    return result, all_formatting
+# Local Variables:
+# eval: (blacken-mode)
+# End:

llm_commit_helper/formatters/__init__.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""Formatter dispatcher: selects the right formatter by file extension."""
+from pathlib import Path
+from typing import Optional
+def format_diff(
+    path: str,
+    old_content: Optional[str],
+    new_content: Optional[str],
+) -> tuple[str, bool]:
+    """Format a diff for the given file path.
+    Returns (diff_text, is_formatting_only).
+    is_formatting_only=True means no logic changes were found.
+    """
+    ext = Path(path).suffix.lower()
+    if ext == ".py":
+        from llm_commit_helper.formatters.python_fmt import format_python_diff
+        return format_python_diff(path, old_content, new_content)
+    elif ext in (".v", ".sv"):
+        from llm_commit_helper.formatters.verilog_fmt import format_verilog_diff
+        return format_verilog_diff(path, old_content, new_content)
+    else:
+        from llm_commit_helper.formatters.generic_fmt import format_generic_diff
+        return format_generic_diff(path, old_content, new_content)
+# Local Variables:
+# eval: (blacken-mode)
+# End:

llm_commit_helper/formatters/generic_fmt.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""Generic formatter: per-hunk whitespace normalization."""
+import difflib
+import re
+from typing import Optional
+from llm_commit_helper.diff_engine import make_unified_diff, annotate_formatting_hunks
+def _normalize_line(line: str) -> str:
+    """Strip whitespace and normalize runs of spaces/tabs."""
+    return re.sub(r"\s+", " ", line.strip())
+def _hunk_is_formatting_only(removed: list[str], added: list[str]) -> bool:
+    """Return True if hunk differences are purely whitespace."""
+    norm_removed = [_normalize_line(l) for l in removed]
+    norm_added = [_normalize_line(l) for l in added]
+    return norm_removed == norm_added
+def format_generic_diff(
+    path: str,
+    old_content: Optional[str],
+    new_content: Optional[str],
+) -> tuple[str, bool]:
+    """Produce a unified diff with formatting-only hunk annotations.
+    Returns (diff_text, is_formatting_only).
+    """
+    old_lines = (old_content or "").splitlines(keepends=True)
+    new_lines = (new_content or "").splitlines(keepends=True)
+    diff_lines, all_formatting = annotate_formatting_hunks(
+        old_lines, new_lines, _hunk_is_formatting_only
+    )
+    if not diff_lines:
+        return "", True  # no diff at all
+    diff_text = "".join(diff_lines)
+    return diff_text, all_formatting
+# Local Variables:
+# eval: (blacken-mode)
+# End:

llm_commit_helper/formatters/python_fmt.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""Python formatter: use black to separate logic from formatting changes."""
+import sys
+from typing import Optional
+from llm_commit_helper.utils import make_temp_file, run_command
+from llm_commit_helper.diff_engine import annotate_formatting_hunks
+from llm_commit_helper.formatters.generic_fmt import _hunk_is_formatting_only
+def _run_black(path_str: str) -> bool:
+    """Run black --quiet on the given file path. Return True on success."""
+    rc, _, err = run_command(["black", "--quiet", path_str])
+    if rc != 0:
+        return False
+    return True
+def format_python_diff(
+    path: str,
+    old_content: Optional[str],
+    new_content: Optional[str],
+) -> tuple[str, bool]:
+    """Format Python diff using black to isolate logic changes.
+    Returns (diff_text, is_formatting_only).
+    Falls back to generic diff if black is not available.
+    """
+    old_tmp = None
+    new_tmp = None
+    try:
+        old_tmp = make_temp_file(suffix=".py", content=old_content or "")
+        new_tmp = make_temp_file(suffix=".py", content=new_content or "")
+        old_ok = _run_black(str(old_tmp))
+        new_ok = _run_black(str(new_tmp))
+        if not old_ok or not new_ok:
+            print(
+                f"[llm-commit-helper] black not available or failed for {path}, falling back to generic",
+                file=sys.stderr,
+            )
+            from llm_commit_helper.formatters.generic_fmt import format_generic_diff
+            return format_generic_diff(path, old_content, new_content)
+        old_formatted = old_tmp.read_text(encoding="utf-8")
+        new_formatted = new_tmp.read_text(encoding="utf-8")
+        old_lines = old_formatted.splitlines(keepends=True)
+        new_lines = new_formatted.splitlines(keepends=True)
+        diff_lines, all_formatting = annotate_formatting_hunks(
+            old_lines, new_lines, _hunk_is_formatting_only
+        )
+        if not diff_lines:
+            return "", True  # identical after formatting
+        # Check if logic diff is empty (formatted versions match)
+        # If the only differences are in the raw diff (pre-formatting), it's formatting-only
+        raw_old = (old_content or "").splitlines(keepends=True)
+        raw_new = (new_content or "").splitlines(keepends=True)
+        raw_diff_lines, _ = annotate_formatting_hunks(raw_old, raw_new, _hunk_is_formatting_only)
+        if not diff_lines and raw_diff_lines:
+            return "[all changes are formatting-only (black normalization)]", True
+        diff_text = "".join(diff_lines)
+        return diff_text, all_formatting
+    finally:
+        if old_tmp and old_tmp.exists():
+            old_tmp.unlink()
+        if new_tmp and new_tmp.exists():
+            new_tmp.unlink()
+# Local Variables:
+# eval: (blacken-mode)
+# End: