PyPI - gitsumm - Versions diffs - 1.0.0__py3-none-any.whl - Mend

gitsumm 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

gitsumm/__init__.py +3 -0
gitsumm/ai.py +283 -0
gitsumm/git_utils.py +551 -0
gitsumm/main.py +457 -0
gitsumm-1.0.0.dist-info/METADATA +209 -0
gitsumm-1.0.0.dist-info/RECORD +10 -0
gitsumm-1.0.0.dist-info/WHEEL +5 -0
gitsumm-1.0.0.dist-info/entry_points.txt +2 -0
gitsumm-1.0.0.dist-info/licenses/LICENSE +21 -0
gitsumm-1.0.0.dist-info/top_level.txt +1 -0

gitsumm/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""gitsumm — summarize a git repository's recent activity in plain English."""
+__version__ = "1.0.0"

gitsumm/ai.py ADDED Viewed

@@ -0,0 +1,283 @@
+"""Optional AI summary via the Anthropic SDK.
+This is the **only** module that touches the ``anthropic`` SDK. Importing this
+module must never fail when the dependency or API key is absent: the SDK is
+imported lazily inside the function that needs it, and every failure path falls
+back to a deterministic templated paragraph.
+``generate_paragraph`` therefore *always* returns usable prose — the caller can
+print ``result.text`` unconditionally and only needs ``result.hint`` to tell
+the user why AI was skipped.
+"""
+from __future__ import annotations
+import os
+import re
+from dataclasses import dataclass
+from typing import List, Optional
+from gitsumm.git_utils import Commit, Summary
+# A small, fast model is plenty for a few-sentence digest. Overridable via env
+# so users can opt into a larger model without a new CLI flag.
+DEFAULT_MODEL = "claude-haiku-4-5-20251001"
+@dataclass
+class AIResult:
+    """The outcome of an AI summary attempt.
+    ``text`` is always populated (real AI output or the templated fallback).
+    ``used_ai`` says which one. ``hint`` is a short, user-facing note explaining
+    a fallback, or None when AI succeeded.
+    """
+    text: str
+    used_ai: bool
+    hint: Optional[str] = None
+def _plural(n: int, word: str) -> str:
+    return f"{n} {word}" + ("" if n == 1 else "s")
+def templated_paragraph(summary: Summary, repo: str) -> str:
+    """Deterministic prose digest — the offline fallback, no network needed."""
+    parts = [
+        f"In the last {_plural(summary.days, 'day')}, {repo} saw "
+        f"{_plural(summary.commit_count, 'commit')} from "
+        f"{_plural(summary.contributor_count, 'contributor')}."
+    ]
+    top = summary.top_author
+    if top:
+        name, count = top
+        parts.append(f"{name} was most active with {_plural(count, 'commit')}.")
+    busiest = summary.busiest_day
+    if busiest:
+        day, _ = busiest
+        parts.append(f"Activity peaked on {day}.")
+    momentum = summary.momentum
+    if momentum is not None:
+        _, delta = momentum
+        if delta > 0:
+            parts.append(f"That's up {delta} on the previous window.")
+        elif delta < 0:
+            parts.append(f"That's down {abs(delta)} on the previous window.")
+        else:
+            parts.append("That's flat versus the previous window.")
+    if summary.bus_factor_files:
+        n = len(summary.bus_factor_files)
+        parts.append(
+            f"Worth noting: {_plural(n, 'file')} were touched by a single "
+            "author, a bus-factor risk."
+        )
+    return " ".join(parts)
+def _build_prompt(summary: Summary, repo: str) -> str:
+    """Turn the structured summary into a compact prompt of plain facts."""
+    authors = ", ".join(
+        f"{name} ({_plural(c, 'commit')})"
+        for name, c in summary.authors.most_common()
+    )
+    lines = [
+        f"Repository: {repo}",
+        f"Window: last {_plural(summary.days, 'day')}",
+        f"Commits: {summary.commit_count}",
+        f"Contributors: {authors or 'none'}",
+        f"Files touched: {summary.files_changed}",
+    ]
+    if summary.busiest_day:
+        day, c = summary.busiest_day
+        lines.append(f"Busiest day: {day} ({_plural(c, 'commit')})")
+    if summary.momentum is not None:
+        prev, delta = summary.momentum
+        lines.append(
+            f"Previous window commits: {prev} (change: {delta:+d})"
+        )
+    if summary.bus_factor_files:
+        files = ", ".join(
+            f"{f} (only {a})" for f, a in summary.bus_factor_files[:5]
+        )
+        lines.append(f"Single-author files (bus-factor risk): {files}")
+    facts = "\n".join(lines)
+    return (
+        "You are writing a short activity digest of a git repository for a "
+        "team standup or changelog. Using only the facts below, write 2-4 "
+        "fluent sentences in plain English. Be specific and useful; do not "
+        "invent details or add a preamble. If there is a bus-factor risk, "
+        "mention it briefly.\n\n"
+        f"{facts}"
+    )
+def _import_anthropic():
+    """Import the anthropic SDK lazily; return the module or None if absent."""
+    try:
+        import anthropic  # noqa: WPS433 (intentional local import)
+    except ImportError:
+        return None
+    return anthropic
+def _ai_or_fallback(
+    fallback: str, prompt: str, max_tokens: int, model: Optional[str]
+) -> AIResult:
+    """Call the model with ``prompt``, or return ``fallback`` with a hint.
+    The single crash-proof path shared by every AI feature: SDK missing, no API
+    key, or a request failure each yield the templated ``fallback`` and a short
+    ``hint`` — this never raises.
+    """
+    anthropic = _import_anthropic()
+    if anthropic is None:
+        return AIResult(
+            fallback,
+            used_ai=False,
+            hint="No anthropic SDK — serving the offline summary instead. "
+            "Want the AI? pip install 'gitsumm[ai]'",
+        )
+    if not os.environ.get("ANTHROPIC_API_KEY"):
+        return AIResult(
+            fallback,
+            used_ai=False,
+            hint="No ANTHROPIC_API_KEY — serving the offline summary instead.",
+        )
+    try:
+        client = anthropic.Anthropic()
+        message = client.messages.create(
+            model=model or os.environ.get("GITSUMM_AI_MODEL", DEFAULT_MODEL),
+            max_tokens=max_tokens,
+            messages=[{"role": "user", "content": prompt}],
+        )
+        text = "".join(
+            block.text for block in message.content if block.type == "text"
+        ).strip()
+        if not text:
+            raise ValueError("empty response from the API")
+        return AIResult(text, used_ai=True)
+    except Exception as exc:  # never let an AI hiccup crash the tool
+        return AIResult(
+            fallback,
+            used_ai=False,
+            hint=f"AI took a rain check ({exc}) — serving the offline summary "
+            "instead.",
+        )
+def generate_paragraph(
+    summary: Summary, repo: str, model: Optional[str] = None
+) -> AIResult:
+    """Return a fluent paragraph for the summary.
+    Tries the Anthropic API; on any obstacle it returns the templated paragraph
+    with a short ``hint`` and never raises.
+    """
+    return _ai_or_fallback(
+        fallback=templated_paragraph(summary, repo),
+        prompt=_build_prompt(summary, repo),
+        max_tokens=300,
+        model=model,
+    )
+# --- Themed changelog -------------------------------------------------------
+CHANGELOG_CATEGORIES = ("Features", "Fixes", "Refactors", "Other")
+# Leading-verb keywords used to bucket a commit when the AI is unavailable.
+_FIX_WORDS = {
+    "fix", "fixes", "fixed", "bug", "bugfix", "hotfix", "patch", "patches",
+    "patched", "resolve", "resolves", "resolved", "correct", "corrects",
+    "corrected", "revert",
+}
+_FEATURE_WORDS = {
+    "add", "adds", "added", "implement", "implements", "implemented",
+    "introduce", "introduces", "support", "supports", "create", "creates",
+    "created", "enable", "enables", "new", "feature",
+}
+_REFACTOR_WORDS = {
+    "refactor", "refactors", "refactored", "rename", "renames", "renamed",
+    "cleanup", "simplify", "simplifies", "simplified", "restructure", "move",
+    "moves", "moved", "extract", "extracts", "extracted", "tidy", "reformat",
+}
+def _classify(subject: str) -> str:
+    """Bucket a commit subject into one of :data:`CHANGELOG_CATEGORIES`."""
+    words = re.findall(r"[a-z]+", subject.lower())
+    if not words:
+        return "Other"
+    first = words[0]
+    # The leading verb is the strongest signal; check Fixes first so a subject
+    # like "Fix the add-user flow" lands in Fixes, not Features.
+    if first in _FIX_WORDS:
+        return "Fixes"
+    if first in _FEATURE_WORDS:
+        return "Features"
+    if first in _REFACTOR_WORDS:
+        return "Refactors"
+    # Fall back to any keyword anywhere in the subject (whole words only).
+    present = set(words)
+    if present & _FIX_WORDS:
+        return "Fixes"
+    if present & _FEATURE_WORDS:
+        return "Features"
+    if present & _REFACTOR_WORDS:
+        return "Refactors"
+    return "Other"
+def templated_changelog(commits: List[Commit]) -> str:
+    """Deterministic grouped changelog — the offline fallback, no network."""
+    groups: dict = {cat: [] for cat in CHANGELOG_CATEGORIES}
+    for c in commits:
+        groups[_classify(c.subject)].append(c.subject)
+    sections = []
+    for category in CHANGELOG_CATEGORIES:
+        items = groups[category]
+        if not items:
+            continue
+        bullets = "\n".join(f"  • {subject}" for subject in items)
+        sections.append(f"[bold]{category}[/]\n{bullets}")
+    return "\n\n".join(sections) if sections else "No commits to summarize."
+def _changelog_prompt(commits: List[Commit]) -> str:
+    subjects = "\n".join(f"- {c.subject}" for c in commits)
+    return (
+        "You are writing release notes from a list of git commit subjects. "
+        "Group them under these headings, in this order: Features, Fixes, "
+        "Refactors, Other. Omit any heading that has no items. Under each "
+        "heading write one concise past-tense bullet per change in plain "
+        "English — rewrite terse subjects into readable notes and merge obvious "
+        "duplicates. Do not invent anything not present and add no preamble.\n\n"
+        f"Commits:\n{subjects}"
+    )
+def themed_changelog(
+    commits: List[Commit], model: Optional[str] = None
+) -> AIResult:
+    """Return a changelog grouping commits into Features/Fixes/Refactors/Other.
+    Tries the Anthropic API for fluent notes; falls back to deterministic
+    keyword grouping (with a ``hint``) and never raises.
+    """
+    return _ai_or_fallback(
+        fallback=templated_changelog(commits),
+        prompt=_changelog_prompt(commits),
+        max_tokens=800,
+        model=model,
+    )