gitsumm 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gitsumm/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """gitsumm — summarize a git repository's recent activity in plain English."""
2
+
3
+ __version__ = "1.0.0"
gitsumm/ai.py ADDED
@@ -0,0 +1,283 @@
1
+ """Optional AI summary via the Anthropic SDK.
2
+
3
+ This is the **only** module that touches the ``anthropic`` SDK. Importing this
4
+ module must never fail when the dependency or API key is absent: the SDK is
5
+ imported lazily inside the function that needs it, and every failure path falls
6
+ back to a deterministic templated paragraph.
7
+
8
+ ``generate_paragraph`` therefore *always* returns usable prose — the caller can
9
+ print ``result.text`` unconditionally and only needs ``result.hint`` to tell
10
+ the user why AI was skipped.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import os
16
+ import re
17
+ from dataclasses import dataclass
18
+ from typing import List, Optional
19
+
20
+ from gitsumm.git_utils import Commit, Summary
21
+
22
+ # A small, fast model is plenty for a few-sentence digest. Overridable via env
23
+ # so users can opt into a larger model without a new CLI flag.
24
+ DEFAULT_MODEL = "claude-haiku-4-5-20251001"
25
+
26
+
27
+ @dataclass
28
+ class AIResult:
29
+ """The outcome of an AI summary attempt.
30
+
31
+ ``text`` is always populated (real AI output or the templated fallback).
32
+ ``used_ai`` says which one. ``hint`` is a short, user-facing note explaining
33
+ a fallback, or None when AI succeeded.
34
+ """
35
+
36
+ text: str
37
+ used_ai: bool
38
+ hint: Optional[str] = None
39
+
40
+
41
+ def _plural(n: int, word: str) -> str:
42
+ return f"{n} {word}" + ("" if n == 1 else "s")
43
+
44
+
45
+ def templated_paragraph(summary: Summary, repo: str) -> str:
46
+ """Deterministic prose digest — the offline fallback, no network needed."""
47
+ parts = [
48
+ f"In the last {_plural(summary.days, 'day')}, {repo} saw "
49
+ f"{_plural(summary.commit_count, 'commit')} from "
50
+ f"{_plural(summary.contributor_count, 'contributor')}."
51
+ ]
52
+
53
+ top = summary.top_author
54
+ if top:
55
+ name, count = top
56
+ parts.append(f"{name} was most active with {_plural(count, 'commit')}.")
57
+
58
+ busiest = summary.busiest_day
59
+ if busiest:
60
+ day, _ = busiest
61
+ parts.append(f"Activity peaked on {day}.")
62
+
63
+ momentum = summary.momentum
64
+ if momentum is not None:
65
+ _, delta = momentum
66
+ if delta > 0:
67
+ parts.append(f"That's up {delta} on the previous window.")
68
+ elif delta < 0:
69
+ parts.append(f"That's down {abs(delta)} on the previous window.")
70
+ else:
71
+ parts.append("That's flat versus the previous window.")
72
+
73
+ if summary.bus_factor_files:
74
+ n = len(summary.bus_factor_files)
75
+ parts.append(
76
+ f"Worth noting: {_plural(n, 'file')} were touched by a single "
77
+ "author, a bus-factor risk."
78
+ )
79
+
80
+ return " ".join(parts)
81
+
82
+
83
+ def _build_prompt(summary: Summary, repo: str) -> str:
84
+ """Turn the structured summary into a compact prompt of plain facts."""
85
+ authors = ", ".join(
86
+ f"{name} ({_plural(c, 'commit')})"
87
+ for name, c in summary.authors.most_common()
88
+ )
89
+ lines = [
90
+ f"Repository: {repo}",
91
+ f"Window: last {_plural(summary.days, 'day')}",
92
+ f"Commits: {summary.commit_count}",
93
+ f"Contributors: {authors or 'none'}",
94
+ f"Files touched: {summary.files_changed}",
95
+ ]
96
+ if summary.busiest_day:
97
+ day, c = summary.busiest_day
98
+ lines.append(f"Busiest day: {day} ({_plural(c, 'commit')})")
99
+ if summary.momentum is not None:
100
+ prev, delta = summary.momentum
101
+ lines.append(
102
+ f"Previous window commits: {prev} (change: {delta:+d})"
103
+ )
104
+ if summary.bus_factor_files:
105
+ files = ", ".join(
106
+ f"{f} (only {a})" for f, a in summary.bus_factor_files[:5]
107
+ )
108
+ lines.append(f"Single-author files (bus-factor risk): {files}")
109
+
110
+ facts = "\n".join(lines)
111
+ return (
112
+ "You are writing a short activity digest of a git repository for a "
113
+ "team standup or changelog. Using only the facts below, write 2-4 "
114
+ "fluent sentences in plain English. Be specific and useful; do not "
115
+ "invent details or add a preamble. If there is a bus-factor risk, "
116
+ "mention it briefly.\n\n"
117
+ f"{facts}"
118
+ )
119
+
120
+
121
+ def _import_anthropic():
122
+ """Import the anthropic SDK lazily; return the module or None if absent."""
123
+ try:
124
+ import anthropic # noqa: WPS433 (intentional local import)
125
+ except ImportError:
126
+ return None
127
+ return anthropic
128
+
129
+
130
+ def _ai_or_fallback(
131
+ fallback: str, prompt: str, max_tokens: int, model: Optional[str]
132
+ ) -> AIResult:
133
+ """Call the model with ``prompt``, or return ``fallback`` with a hint.
134
+
135
+ The single crash-proof path shared by every AI feature: SDK missing, no API
136
+ key, or a request failure each yield the templated ``fallback`` and a short
137
+ ``hint`` — this never raises.
138
+ """
139
+ anthropic = _import_anthropic()
140
+ if anthropic is None:
141
+ return AIResult(
142
+ fallback,
143
+ used_ai=False,
144
+ hint="No anthropic SDK — serving the offline summary instead. "
145
+ "Want the AI? pip install 'gitsumm[ai]'",
146
+ )
147
+
148
+ if not os.environ.get("ANTHROPIC_API_KEY"):
149
+ return AIResult(
150
+ fallback,
151
+ used_ai=False,
152
+ hint="No ANTHROPIC_API_KEY — serving the offline summary instead.",
153
+ )
154
+
155
+ try:
156
+ client = anthropic.Anthropic()
157
+ message = client.messages.create(
158
+ model=model or os.environ.get("GITSUMM_AI_MODEL", DEFAULT_MODEL),
159
+ max_tokens=max_tokens,
160
+ messages=[{"role": "user", "content": prompt}],
161
+ )
162
+ text = "".join(
163
+ block.text for block in message.content if block.type == "text"
164
+ ).strip()
165
+ if not text:
166
+ raise ValueError("empty response from the API")
167
+ return AIResult(text, used_ai=True)
168
+ except Exception as exc: # never let an AI hiccup crash the tool
169
+ return AIResult(
170
+ fallback,
171
+ used_ai=False,
172
+ hint=f"AI took a rain check ({exc}) — serving the offline summary "
173
+ "instead.",
174
+ )
175
+
176
+
177
+ def generate_paragraph(
178
+ summary: Summary, repo: str, model: Optional[str] = None
179
+ ) -> AIResult:
180
+ """Return a fluent paragraph for the summary.
181
+
182
+ Tries the Anthropic API; on any obstacle it returns the templated paragraph
183
+ with a short ``hint`` and never raises.
184
+ """
185
+ return _ai_or_fallback(
186
+ fallback=templated_paragraph(summary, repo),
187
+ prompt=_build_prompt(summary, repo),
188
+ max_tokens=300,
189
+ model=model,
190
+ )
191
+
192
+
193
+ # --- Themed changelog -------------------------------------------------------
194
+
195
+ CHANGELOG_CATEGORIES = ("Features", "Fixes", "Refactors", "Other")
196
+
197
+ # Leading-verb keywords used to bucket a commit when the AI is unavailable.
198
+ _FIX_WORDS = {
199
+ "fix", "fixes", "fixed", "bug", "bugfix", "hotfix", "patch", "patches",
200
+ "patched", "resolve", "resolves", "resolved", "correct", "corrects",
201
+ "corrected", "revert",
202
+ }
203
+ _FEATURE_WORDS = {
204
+ "add", "adds", "added", "implement", "implements", "implemented",
205
+ "introduce", "introduces", "support", "supports", "create", "creates",
206
+ "created", "enable", "enables", "new", "feature",
207
+ }
208
+ _REFACTOR_WORDS = {
209
+ "refactor", "refactors", "refactored", "rename", "renames", "renamed",
210
+ "cleanup", "simplify", "simplifies", "simplified", "restructure", "move",
211
+ "moves", "moved", "extract", "extracts", "extracted", "tidy", "reformat",
212
+ }
213
+
214
+
215
+ def _classify(subject: str) -> str:
216
+ """Bucket a commit subject into one of :data:`CHANGELOG_CATEGORIES`."""
217
+ words = re.findall(r"[a-z]+", subject.lower())
218
+ if not words:
219
+ return "Other"
220
+ first = words[0]
221
+ # The leading verb is the strongest signal; check Fixes first so a subject
222
+ # like "Fix the add-user flow" lands in Fixes, not Features.
223
+ if first in _FIX_WORDS:
224
+ return "Fixes"
225
+ if first in _FEATURE_WORDS:
226
+ return "Features"
227
+ if first in _REFACTOR_WORDS:
228
+ return "Refactors"
229
+ # Fall back to any keyword anywhere in the subject (whole words only).
230
+ present = set(words)
231
+ if present & _FIX_WORDS:
232
+ return "Fixes"
233
+ if present & _FEATURE_WORDS:
234
+ return "Features"
235
+ if present & _REFACTOR_WORDS:
236
+ return "Refactors"
237
+ return "Other"
238
+
239
+
240
+ def templated_changelog(commits: List[Commit]) -> str:
241
+ """Deterministic grouped changelog — the offline fallback, no network."""
242
+ groups: dict = {cat: [] for cat in CHANGELOG_CATEGORIES}
243
+ for c in commits:
244
+ groups[_classify(c.subject)].append(c.subject)
245
+
246
+ sections = []
247
+ for category in CHANGELOG_CATEGORIES:
248
+ items = groups[category]
249
+ if not items:
250
+ continue
251
+ bullets = "\n".join(f" • {subject}" for subject in items)
252
+ sections.append(f"[bold]{category}[/]\n{bullets}")
253
+
254
+ return "\n\n".join(sections) if sections else "No commits to summarize."
255
+
256
+
257
+ def _changelog_prompt(commits: List[Commit]) -> str:
258
+ subjects = "\n".join(f"- {c.subject}" for c in commits)
259
+ return (
260
+ "You are writing release notes from a list of git commit subjects. "
261
+ "Group them under these headings, in this order: Features, Fixes, "
262
+ "Refactors, Other. Omit any heading that has no items. Under each "
263
+ "heading write one concise past-tense bullet per change in plain "
264
+ "English — rewrite terse subjects into readable notes and merge obvious "
265
+ "duplicates. Do not invent anything not present and add no preamble.\n\n"
266
+ f"Commits:\n{subjects}"
267
+ )
268
+
269
+
270
+ def themed_changelog(
271
+ commits: List[Commit], model: Optional[str] = None
272
+ ) -> AIResult:
273
+ """Return a changelog grouping commits into Features/Fixes/Refactors/Other.
274
+
275
+ Tries the Anthropic API for fluent notes; falls back to deterministic
276
+ keyword grouping (with a ``hint``) and never raises.
277
+ """
278
+ return _ai_or_fallback(
279
+ fallback=templated_changelog(commits),
280
+ prompt=_changelog_prompt(commits),
281
+ max_tokens=800,
282
+ model=model,
283
+ )