PyPI - git-explain - Versions diffs - 1.1.0__py3-none-any.whl - Mend

git-explain 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

git_explain/__init__.py +1 -0
git_explain/cli.py +481 -0
git_explain/gemini.py +324 -0
git_explain/git.py +170 -0
git_explain/heuristics.py +123 -0
git_explain/run.py +54 -0
git_explain-1.1.0.dist-info/METADATA +143 -0
git_explain-1.1.0.dist-info/RECORD +12 -0
git_explain-1.1.0.dist-info/WHEEL +5 -0
git_explain-1.1.0.dist-info/entry_points.txt +2 -0
git_explain-1.1.0.dist-info/licenses/LICENSE +201 -0
git_explain-1.1.0.dist-info/top_level.txt +1 -0

git_explain/gemini.py ADDED Viewed

@@ -0,0 +1,324 @@
+"""Suggest git add and commit from diff using Google Gemini."""
+import os
+import re
+import time
+from dataclasses import dataclass
+from google import genai
+from google.genai import types
+SYSTEM_PROMPT = """You are given a list of changed/added files under ## Staged, ## Unstaged, ## Untracked.
+Each file line is: <STATUS> <PATH> where STATUS is one of:
+- A = added/new file
+- M = modified
+- D = deleted
+- R = renamed
+- C = copied
+Suggest one commit that includes ALL of these files.
+Rules:
+1. Line 1 must be: git add <path1> <path2> ... with EVERY PATH from the list (all sections). Do not omit any file. Do not truncate. Do not include status letters.
+2. Line 2 must be: git commit -m "[TYPE] Message" with TYPE one of: FEAT, FIX, DOCS, REFACTOR, TEST.
+3. The message must be a short, specific summary of what the change does based on the file names (e.g. "Add README and feature status doc", "Fix Gemini model and add file-list mode"). Never use only generic words like "update", "changes", or "refactor" by themselves—always add what was updated (e.g. "Update docs and CLI prompt").
+4. Use imperative, no period at end. Maximum one short line.
+Example for files README.md, FEATURES.md, git_explain/gemini.py:
+git add README.md FEATURES.md git_explain/gemini.py
+git commit -m "[DOCS] Add README and FEATURES doc, tune Gemini prompt"
+"""
+SYSTEM_PROMPT_WITH_DIFF = """You are given:
+1. A list of changed/added files (## Staged, ## Unstaged, ## Untracked) with <STATUS> <PATH>.
+2. The full diff (## Staged diff, ## Unstaged diff, ## Untracked) showing exact code changes.
+Use the diff to write a specific, detailed commit message. Do not use generic words like "update" or "changes"—describe what actually changed (e.g. "add opt-in --with-diff to send full diff to LLM for detailed messages", "tweak commit message edit flow to show suggestion before prompting to edit").
+Output format (conventional commits style):
+- Line 1: git add <path1> <path2> ... with EVERY path from the file list. Do not omit any.
+- Line 2: git commit -m "type: subject" where type is exactly one of: feat, fix, docs, refactor, test.
+  The subject must be a short, specific summary in imperative mood, no period at end (e.g. "feat: allow editing commit message before apply", "fix: parse conventional commit line from AI").
+Example:
+git add git_explain/cli.py git_explain/gemini.py
+git commit -m "feat: add opt-in --with-diff for detailed AI commit messages"
+"""
+ADD_LINE_RE = re.compile(r"git\s+add\s+(.+)", re.IGNORECASE)
+COMMIT_LINE_RE = re.compile(
+    r'git\s+commit\s+-m\s+["\']\[(FEAT|FIX|DOCS|REFACTOR|TESTS)\]\s*(.+?)["\']',
+    re.IGNORECASE,
+)
+# Conventional: "feat: subject" or "fix: subject" (use "tests" not "test")
+COMMIT_LINE_CONVENTIONAL_RE = re.compile(
+    r'git\s+commit\s+-m\s+["\'](feat|fix|docs|refactor|tests)\s*:\s*(.+?)["\']',
+    re.IGNORECASE,
+)
+DEFAULT_MODEL = "gemini-2.5-flash"
+_GENERIC_MESSAGES = {
+    "update",
+    "updates",
+    "change",
+    "changes",
+    "refactor",
+    "refactoring",
+    "fix",
+    "fixes",
+    "docs",
+    "documentation",
+    "test",
+    "tests",
+    "misc",
+}
+def _is_generic_message(message: str) -> bool:
+    msg = (message or "").strip().lower()
+    if not msg:
+        return True
+    if msg in _GENERIC_MESSAGES:
+        return True
+    # "update X" is okay, but bare "update" or "update stuff" isn't
+    if re.fullmatch(
+        r"(update|updates|change|changes|refactor|refactoring|misc)(\s+.+)?", msg
+    ):
+        return msg in _GENERIC_MESSAGES or len(msg.split()) < 2
+    if len(msg) < 12:
+        return True
+    return False
+def _fallback_type_and_message(files: list[str]) -> tuple[str, str]:
+    # Backward-compat wrapper (shouldn't be used now that we parse status codes)
+    return _fallback_type_and_message_with_context(
+        files=files, added_any=False, has_commits=True
+    )
+def _fallback_type_and_message_with_context(
+    *,
+    files: list[str],
+    added_any: bool,
+    has_commits: bool | None,
+) -> tuple[str, str]:
+    lower = [f.lower() for f in files]
+    docs_exts = {".md", ".rst", ".txt"}
+    code_exts = {".py", ".js", ".ts", ".tsx", ".go", ".rs", ".java"}
+    def is_doc(f: str) -> bool:
+        return os.path.splitext(f)[1].lower() in docs_exts or f.endswith(
+            ("readme", "readme.md", "features.md")
+        )
+    def is_code(f: str) -> bool:
+        return os.path.splitext(f)[1].lower() in code_exts
+    def is_packaging(f: str) -> bool:
+        return f.endswith(
+            ("pyproject.toml", "requirements.txt", "setup.cfg", "setup.py")
+        )
+    docs_only = files and all(is_doc(f) for f in lower)
+    touches_docs = any(is_doc(f) for f in lower)
+    touches_packaging = any(is_packaging(f) for f in lower)
+    verb = "Add" if (added_any or has_commits is False) else "Update"
+    if docs_only:
+        commit_type = "DOCS"
+    elif verb == "Add":
+        commit_type = "FEAT"
+    else:
+        commit_type = "REFACTOR"
+    topics: list[str] = []
+    if any(f.endswith("readme.md") or f.endswith("readme") for f in lower):
+        topics.append("README")
+    if any(f.endswith("features.md") for f in lower):
+        topics.append("FEATURES doc")
+    if touches_docs and not docs_only:
+        topics.append("docs")
+    if any(f.startswith("git_explain/") for f in lower) or any(
+        "/git_explain/" in f for f in lower
+    ):
+        topics.append("git-explain CLI")
+    if any("git_explain/gemini.py" in f for f in lower):
+        topics.append("Gemini integration")
+    if any("git_explain/git.py" in f for f in lower):
+        topics.append("change detection")
+    if any("git_explain/cli.py" in f for f in lower):
+        topics.append("CLI output")
+    if touches_packaging:
+        topics.append("packaging config")
+    if not topics:
+        topics = ["project files"]
+    # Dedupe while keeping order
+    seen: set[str] = set()
+    topics = [t for t in topics if not (t in seen or seen.add(t))]
+    if len(topics) == 1:
+        msg = f"{verb} {topics[0]}"
+    elif len(topics) == 2:
+        msg = f"{verb} {topics[0]} and {topics[1]}"
+    else:
+        msg = f"{verb} {topics[0]}, {topics[1]}, and {topics[2]}"
+    if verb == "Add" and (has_commits is False):
+        # Make initial commits a little clearer but still "Add …"
+        msg = msg.replace("Add ", "Add initial ", 1) if msg.startswith("Add ") else msg
+    msg = msg.strip().rstrip(".")
+    if len(msg) > 72:
+        msg = msg[:72].rstrip()
+    return commit_type, msg
+def _parse_changed_file_list(diff: str) -> tuple[list[tuple[str, str]], bool | None]:
+    """Parse the combined changed-file list into [(status, path)], plus has_commits if present."""
+    entries: list[tuple[str, str]] = []
+    section: str | None = None
+    has_commits: bool | None = None
+    for raw in diff.splitlines():
+        line = raw.strip()
+        if not line:
+            continue
+        if line.startswith("## "):
+            section = line[3:].strip()
+            continue
+        if section == "Meta" and line.lower().startswith("has_commits:"):
+            v = line.split(":", 1)[1].strip().lower()
+            if v in ("true", "false"):
+                has_commits = v == "true"
+            continue
+        m = re.match(r"^([AMDRCU])\s+(.+)$", line, re.IGNORECASE)
+        if m:
+            status = m.group(1).upper()
+            path = m.group(2).strip()
+            entries.append((status, path))
+        else:
+            # Backward compatibility: treat as modified path
+            entries.append(("M", line))
+    return entries, has_commits
+@dataclass
+class Suggestion:
+    add_args: list[str]
+    commit_type: str
+    commit_message: str
+def _get_client() -> genai.Client:
+    api_key = os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY")
+    if not api_key:
+        raise RuntimeError("Set GEMINI_API_KEY or GOOGLE_API_KEY in environment.")
+    return genai.Client(api_key=api_key)
+def suggest_commands(
+    diff: str, model: str | None = None, with_diff: bool = False
+) -> tuple[Suggestion | None, str]:
+    """Call Gemini with the file list (and optionally full diff); return (suggestion, raw_response). suggestion is None if unparseable."""
+    if not diff or not diff.strip():
+        return None, ""
+    model = model or os.environ.get("GEMINI_MODEL") or DEFAULT_MODEL
+    system_instruction = SYSTEM_PROMPT_WITH_DIFF if with_diff else SYSTEM_PROMPT
+    client = _get_client()
+    last_err = None
+    for attempt in range(2):
+        try:
+            response = client.models.generate_content(
+                model=model,
+                contents=diff.strip(),
+                config=types.GenerateContentConfig(
+                    system_instruction=system_instruction,
+                    temperature=0.2,
+                    max_output_tokens=512 if with_diff else 256,
+                ),
+            )
+            break
+        except Exception as e:
+            last_err = e
+            err_str = str(e).lower()
+            if attempt == 0 and (
+                "429" in err_str
+                or "resource_exhausted" in err_str
+                or "quota" in err_str
+            ):
+                wait = 15
+                if "retry in " in err_str:
+                    m = re.search(
+                        r"retry in (\d+(?:\.\d+)?)\s*s", err_str, re.IGNORECASE
+                    )
+                    if m:
+                        wait = min(60, max(5, int(float(m.group(1)) + 1)))
+                time.sleep(wait)
+                continue
+            raise
+    else:
+        if last_err is not None:
+            raise last_err
+        raise RuntimeError("Unexpected state in suggest_commands")
+    text = (response.text or "").strip()
+    raw = text
+    # Strip markdown code block if present
+    if text.startswith("```"):
+        lines = text.split("\n")
+        if lines[0].startswith("```"):
+            lines = lines[1:]
+        if lines and lines[-1].strip() == "```":
+            lines = lines[:-1]
+        text = "\n".join(lines)
+    lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
+    add_args: list[str] = []
+    commit_type = "REFACTOR"
+    commit_message = "update"
+    for line in lines:
+        add_m = ADD_LINE_RE.match(line)
+        if add_m:
+            add_args = [f.strip() for f in add_m.group(1).split() if f.strip()]
+            continue
+        commit_m = COMMIT_LINE_CONVENTIONAL_RE.match(line) if with_diff else None
+        if commit_m:
+            commit_type = commit_m.group(1).upper()
+            commit_message = commit_m.group(2).strip().rstrip(".")
+            break
+        commit_m = COMMIT_LINE_RE.match(line)
+        if commit_m:
+            commit_type = commit_m.group(1).upper()
+            commit_message = commit_m.group(2).strip().rstrip(".")
+            break
+    if not add_args or not commit_message:
+        return None, raw
+    header_only = diff
+    if with_diff:
+        header_only = diff.split("\n## Diff", 1)[0]
+    entries, has_commits = _parse_changed_file_list(header_only.strip())
+    all_paths = [p for _, p in entries]
+    added_any = any(s == "A" for s, _ in entries)
+    # Always use the full path list we sent (model may truncate or omit)
+    if all_paths:
+        add_args = all_paths
+    # If we're adding new files (or this is an initial commit), don't label it REFACTOR
+    docs_only = all_paths and all(
+        os.path.splitext(p)[1].lower() in {".md", ".rst", ".txt"} for p in all_paths
+    )
+    if (added_any or has_commits is False) and commit_type == "REFACTOR":
+        commit_type = "DOCS" if docs_only else "FEAT"
+    if _is_generic_message(commit_message):
+        commit_type, commit_message = _fallback_type_and_message_with_context(
+            files=add_args, added_any=added_any, has_commits=has_commits
+        )
+    return Suggestion(
+        add_args=add_args, commit_type=commit_type, commit_message=commit_message
+    ), raw

git_explain/git.py ADDED Viewed

@@ -0,0 +1,170 @@
+"""Capture git diffs (staged and unstaged)."""
+import subprocess
+from pathlib import Path
+def get_repo_root(cwd: str | Path | None = None) -> Path:
+    """Return the git repository root. Raises if not in a repo."""
+    result = subprocess.run(
+        ["git", "rev-parse", "--show-toplevel"],
+        capture_output=True,
+        text=True,
+        cwd=cwd or ".",
+    )
+    if result.returncode != 0:
+        raise RuntimeError("Not a git repository (or any of the parent directories).")
+    return Path(result.stdout.strip())
+def ensure_git_repo(cwd: str | Path | None = None) -> Path:
+    """Ensure current directory is inside a git repo; return repo root."""
+    r = subprocess.run(
+        ["git", "rev-parse", "--is-inside-work-tree"],
+        capture_output=True,
+        text=True,
+        cwd=cwd or ".",
+    )
+    if r.returncode != 0 or r.stdout.strip().lower() != "true":
+        raise RuntimeError("Not a git repository (or any of the parent directories).")
+    return get_repo_root(cwd)
+def repo_has_commits(cwd: str | Path | None = None) -> bool:
+    """Return True if the repository has at least one commit."""
+    root = get_repo_root(cwd)
+    result = subprocess.run(
+        ["git", "rev-parse", "--verify", "HEAD"],
+        capture_output=True,
+        text=True,
+        cwd=root,
+    )
+    return result.returncode == 0
+def _name_status(
+    args: list[str], cwd: str | Path | None = None
+) -> list[tuple[str, str]]:
+    """Run a git command that outputs --name-status and return (status, path) pairs.
+    Normalizes rename/copy lines to ('R', new_path) or ('C', new_path).
+    """
+    root = get_repo_root(cwd)
+    result = subprocess.run(
+        ["git"] + args,
+        capture_output=True,
+        text=True,
+        cwd=root,
+    )
+    if result.returncode != 0 or not result.stdout.strip():
+        return []
+    out: list[tuple[str, str]] = []
+    for raw in result.stdout.splitlines():
+        line = raw.strip()
+        if not line:
+            continue
+        # Typical formats:
+        # M\tpath
+        # A\tpath
+        # D\tpath
+        # R100\told\tnew
+        parts = line.split("\t")
+        if len(parts) >= 2:
+            status = parts[0].strip()
+            code = status[:1].upper()
+            path = parts[-1].strip()
+            if code and path:
+                out.append((code, path))
+            continue
+        # Fallback for whitespace-delimited output (should be rare)
+        toks = line.split()
+        if len(toks) >= 2:
+            out.append((toks[0][:1].upper(), toks[-1]))
+    return out
+def get_staged_changes(cwd: str | Path | None = None) -> list[tuple[str, str]]:
+    """Return (status, path) for staged changes."""
+    return _name_status(["diff", "--cached", "--name-status"], cwd=cwd)
+def get_unstaged_changes(cwd: str | Path | None = None) -> list[tuple[str, str]]:
+    """Return (status, path) for unstaged changes (tracked files)."""
+    return _name_status(["diff", "--name-status"], cwd=cwd)
+def get_untracked_changes(cwd: str | Path | None = None) -> list[tuple[str, str]]:
+    """Return (status, path) for untracked files (not ignored by .gitignore)."""
+    root = get_repo_root(cwd)
+    result = subprocess.run(
+        ["git", "ls-files", "--others", "--exclude-standard"],
+        capture_output=True,
+        text=True,
+        cwd=root,
+    )
+    if result.returncode != 0 or not result.stdout.strip():
+        return []
+    paths = [p.strip() for p in result.stdout.strip().splitlines() if p.strip()]
+    return [("A", p) for p in paths]
+def get_combined_diff(cwd: str | Path | None = None) -> tuple[str, Path]:
+    """Return (file_list_text, repo_root).
+    The text includes sections with status codes (A/M/D/R/C) and paths only (no file contents).
+    """
+    root = ensure_git_repo(cwd)
+    has_commits = repo_has_commits(cwd=root)
+    staged = get_staged_changes(cwd=root)
+    unstaged = get_unstaged_changes(cwd=root)
+    untracked = get_untracked_changes(cwd=root)
+    parts = []
+    parts.append(f"## Meta\nhas_commits: {str(has_commits).lower()}")
+    if staged:
+        parts.append("## Staged\n" + "\n".join([f"{s} {p}" for s, p in staged]))
+    if unstaged:
+        parts.append("## Unstaged\n" + "\n".join([f"{s} {p}" for s, p in unstaged]))
+    if untracked:
+        parts.append("## Untracked\n" + "\n".join([f"{s} {p}" for s, p in untracked]))
+    combined = "\n\n".join(parts) if parts else ""
+    return combined, root
+def get_diff_for_paths(paths: list[str], cwd: str | Path | None = None) -> str:
+    """Return combined diff (staged + unstaged) for the given paths.
+    Untracked files are shown as full file content.
+    """
+    if not paths:
+        return ""
+    root = get_repo_root(cwd)
+    parts: list[str] = []
+    result = subprocess.run(
+        ["git", "diff", "--cached", "--"] + paths,
+        capture_output=True,
+        text=True,
+        cwd=root,
+    )
+    if result.returncode == 0 and result.stdout.strip():
+        parts.append("## Staged diff\n" + result.stdout.strip())
+    result = subprocess.run(
+        ["git", "diff", "--"] + paths,
+        capture_output=True,
+        text=True,
+        cwd=root,
+    )
+    if result.returncode == 0 and result.stdout.strip():
+        parts.append("## Unstaged diff\n" + result.stdout.strip())
+    untracked = get_untracked_changes(cwd=root)
+    untracked_set = {p for _, p in untracked}
+    for p in paths:
+        if p in untracked_set:
+            try:
+                content = (root / p).read_text(encoding="utf-8", errors="replace")
+                parts.append(f"## Untracked (new file): {p}\n{content}")
+            except Exception:
+                parts.append(f"## Untracked (new file): {p}\n<binary or unreadable>")
+    return "\n\n".join(parts)

git_explain/heuristics.py ADDED Viewed

@@ -0,0 +1,123 @@
+"""Heuristic suggestions when AI is disabled or unavailable."""
+from __future__ import annotations
+import os
+from git_explain.gemini import Suggestion
+DOC_EXTS = {".md", ".rst", ".txt"}
+TEST_HINTS = ("test", "tests", "pytest", "unittest")
+CONFIG_FILES = {
+    "pyproject.toml",
+    "requirements.txt",
+    "setup.cfg",
+    "setup.py",
+    ".gitignore",
+    "license",
+    "license.txt",
+    "license.md",
+}
+CONFIG_EXTS = {".toml", ".yml", ".yaml", ".json", ".ini", ".cfg", ".lock"}
+def _is_doc(path: str) -> bool:
+    p = path.lower()
+    base = os.path.basename(p)
+    return os.path.splitext(p)[1] in DOC_EXTS or base in {
+        "readme",
+        "readme.md",
+        "features.md",
+    }
+def _is_test(path: str) -> bool:
+    p = path.lower()
+    base = os.path.basename(p)
+    if p.startswith("tests/") or "/tests/" in p:
+        return True
+    if (
+        base.startswith("test_")
+        or base.endswith("_test.py")
+        or base.endswith(".spec.ts")
+        or base.endswith(".spec.tsx")
+    ):
+        return True
+    return any(h in p for h in TEST_HINTS)
+def _is_config(path: str) -> bool:
+    p = path.lower()
+    base = os.path.basename(p)
+    return base in CONFIG_FILES or os.path.splitext(p)[1] in CONFIG_EXTS
+def suggest_from_changes(
+    *,
+    changes: list[tuple[str, str]],
+    has_commits: bool | None,
+) -> Suggestion:
+    """Create a Suggestion from [(status, path)] without calling AI."""
+    paths = [p for _, p in changes]
+    added_any = any(s.upper() == "A" for s, _ in changes) or has_commits is False
+    docs = [p for p in paths if _is_doc(p)]
+    tests = [p for p in paths if _is_test(p)]
+    configs = [p for p in paths if _is_config(p)]
+    has_tests = bool(tests)
+    has_configs = bool(configs)
+    non_docs = [p for p in paths if p not in docs]
+    docs_only = bool(paths) and len(docs) == len(paths)
+    mostly_tests_or_config = False
+    if non_docs:
+        tc = len([p for p in non_docs if p in tests or p in configs])
+        mostly_tests_or_config = tc / max(1, len(non_docs)) >= 0.6
+    verb = "Add" if added_any else "Update"
+    if docs_only:
+        commit_type = "DOCS"
+    elif mostly_tests_or_config:
+        if has_tests and not has_configs:
+            commit_type = "TEST"
+        elif has_configs and not has_tests:
+            commit_type = "CHORE"
+        else:
+            commit_type = "TEST"
+    elif added_any:
+        commit_type = "FEAT"
+    else:
+        commit_type = "REFACTOR"
+    topics: list[str] = []
+    if any(os.path.basename(p).lower() in {"readme.md", "readme"} for p in paths):
+        topics.append("README")
+    if any(os.path.basename(p).lower() == "features.md" for p in paths):
+        topics.append("FEATURES doc")
+    if tests:
+        topics.append("tests")
+    if configs:
+        topics.append("config")
+    if any("git_explain/" in p.replace("\\", "/").lower() for p in paths):
+        topics.append("git-explain CLI")
+    if not topics:
+        topics = ["changes"]
+    # Dedupe while preserving order
+    seen: set[str] = set()
+    topics = [t for t in topics if not (t in seen or seen.add(t))]
+    if len(topics) == 1:
+        message = f"{verb} {topics[0]}"
+    elif len(topics) == 2:
+        message = f"{verb} {topics[0]} and {topics[1]}"
+    else:
+        message = f"{verb} {topics[0]}, {topics[1]}, and {topics[2]}"
+    if added_any and has_commits is False and message.startswith("Add "):
+        message = message.replace("Add ", "Add initial ", 1)
+    return Suggestion(add_args=paths, commit_type=commit_type, commit_message=message)

git_explain/run.py ADDED Viewed

@@ -0,0 +1,54 @@
+"""Apply git add and commit from suggested message."""
+import subprocess
+from pathlib import Path
+def _has_staged_changes(repo_root: Path) -> bool:
+    # Works even for initial commit (unborn HEAD).
+    r = subprocess.run(
+        ["git", "status", "--porcelain"],
+        check=False,
+        cwd=repo_root,
+        capture_output=True,
+        text=True,
+    )
+    for raw in (r.stdout or "").splitlines():
+        if not raw:
+            continue
+        # XY <path> (or ?? for untracked). Staged changes => X != ' ' and X != '?'
+        if len(raw) >= 2 and raw[0] not in (" ", "?"):
+            return True
+    return False
+def apply_commands(
+    repo_root: str | Path,
+    add_args: list[str],
+    commit_type: str,
+    commit_message: str,
+) -> None:
+    """Stage selected paths and commit. Raises on failure.
+    Uses `git add -A -- <paths...>` to properly handle deletes/renames.
+    Verifies that something is staged before attempting the commit.
+    """
+    root = Path(repo_root)
+    if add_args:
+        subprocess.run(
+            ["git", "add", "-A", "--"] + add_args,
+            check=True,
+            cwd=root,
+            capture_output=True,
+            text=True,
+        )
+    if not _has_staged_changes(root):
+        raise RuntimeError("Nothing staged after git add; aborting commit.")
+    full_message = f"[{commit_type}] {commit_message}"
+    subprocess.run(
+        ["git", "commit", "-m", full_message],
+        check=True,
+        cwd=root,
+        capture_output=True,
+        text=True,
+    )