lgit-cli 3.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lgit/__init__.py +75 -0
- lgit/__main__.py +8 -0
- lgit/analysis.py +326 -0
- lgit/api.py +1077 -0
- lgit/cache.py +338 -0
- lgit/changelog.py +523 -0
- lgit/cli.py +1104 -0
- lgit/compose.py +2110 -0
- lgit/config.py +437 -0
- lgit/diffing.py +384 -0
- lgit/errors.py +137 -0
- lgit/git.py +852 -0
- lgit/map_reduce.py +508 -0
- lgit/markdown_output.py +709 -0
- lgit/models.py +924 -0
- lgit/normalization.py +411 -0
- lgit/patch.py +784 -0
- lgit/profile.py +426 -0
- lgit/py.typed +0 -0
- lgit/repo.py +287 -0
- lgit/resources/__init__.py +1 -0
- lgit/resources/commit_types.json +242 -0
- lgit/resources/prompts/analysis/default.md +237 -0
- lgit/resources/prompts/analysis/markdown.md +112 -0
- lgit/resources/prompts/changelog/default.md +89 -0
- lgit/resources/prompts/changelog/markdown.md +60 -0
- lgit/resources/prompts/compose-bind/default.md +40 -0
- lgit/resources/prompts/compose-bind/markdown.md +41 -0
- lgit/resources/prompts/compose-intent/default.md +63 -0
- lgit/resources/prompts/compose-intent/markdown.md +59 -0
- lgit/resources/prompts/fast/default.md +46 -0
- lgit/resources/prompts/fast/markdown.md +51 -0
- lgit/resources/prompts/map/default.md +67 -0
- lgit/resources/prompts/map/markdown.md +63 -0
- lgit/resources/prompts/reduce/default.md +81 -0
- lgit/resources/prompts/reduce/markdown.md +68 -0
- lgit/resources/prompts/summary/default.md +74 -0
- lgit/resources/prompts/summary/markdown.md +77 -0
- lgit/resources/validation_data.json +1 -0
- lgit/rewrite.py +392 -0
- lgit/style.py +295 -0
- lgit/templates.py +385 -0
- lgit/testing/__init__.py +62 -0
- lgit/testing/compare.py +57 -0
- lgit/testing/fixture.py +386 -0
- lgit/testing/report.py +201 -0
- lgit/testing/runner.py +256 -0
- lgit/tokens.py +90 -0
- lgit/validation.py +545 -0
- lgit_cli-3.7.0.dist-info/METADATA +288 -0
- lgit_cli-3.7.0.dist-info/RECORD +54 -0
- lgit_cli-3.7.0.dist-info/WHEEL +4 -0
- lgit_cli-3.7.0.dist-info/entry_points.txt +2 -0
- lgit_cli-3.7.0.dist-info/licenses/LICENSE +21 -0
lgit/markdown_output.py
ADDED
|
@@ -0,0 +1,709 @@
|
|
|
1
|
+
"""Lenient markdown and text parsers for model commit outputs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from collections.abc import Iterable, Mapping
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from .errors import InvalidCommitType
|
|
11
|
+
from .models import (
|
|
12
|
+
AnalysisDetail,
|
|
13
|
+
ChangelogCategory,
|
|
14
|
+
CommitType,
|
|
15
|
+
ConventionalAnalysis,
|
|
16
|
+
ConventionalCommit,
|
|
17
|
+
coerce_commit_type,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
_PREFIX_RE = re.compile(
|
|
21
|
+
r"^\s*(?:#+\s*)?(?P<type>[a-z][a-z0-9-]*)(?:\((?P<scope>[^)]+)\))?!?\s*:\s*(?P<summary>.+?)\s*$",
|
|
22
|
+
re.IGNORECASE,
|
|
23
|
+
)
|
|
24
|
+
_SUMMARY_TAG_RE = re.compile(r"<summary\b[^>]*>\s*(.*?)(?:\s*</[^>]+>|$)", re.IGNORECASE | re.DOTALL)
|
|
25
|
+
_ISSUE_RE = re.compile(r"#\d+(?:\s*-\s*#?\d+)?")
|
|
26
|
+
_CATEGORY_RE = re.compile(
|
|
27
|
+
r"^\s*(?:\[(?P<bracket>[^\]]+)\]|(?P<prefix>Added|Changed|Fixed|Deprecated|Removed|Security|Breaking Changes)\s*:)\s*(?P<text>.*)$",
|
|
28
|
+
re.IGNORECASE,
|
|
29
|
+
)
|
|
30
|
+
_SUMMARY_VERBS = {
|
|
31
|
+
"feat": "added",
|
|
32
|
+
"fix": "fixed",
|
|
33
|
+
"refactor": "restructured",
|
|
34
|
+
"docs": "documented",
|
|
35
|
+
"test": "tested",
|
|
36
|
+
"perf": "optimized",
|
|
37
|
+
"build": "updated",
|
|
38
|
+
"ci": "updated",
|
|
39
|
+
"chore": "updated",
|
|
40
|
+
"style": "formatted",
|
|
41
|
+
"revert": "reverted",
|
|
42
|
+
}
|
|
43
|
+
_SUMMARY_SAFE_DEFAULTS = {
|
|
44
|
+
"refactor": "restructured change",
|
|
45
|
+
"feat": "added functionality",
|
|
46
|
+
"fix": "fixed issue",
|
|
47
|
+
"docs": "documented updates",
|
|
48
|
+
"test": "tested changes",
|
|
49
|
+
"chore": "updated tooling",
|
|
50
|
+
"build": "updated tooling",
|
|
51
|
+
"ci": "updated tooling",
|
|
52
|
+
"style": "updated tooling",
|
|
53
|
+
"perf": "optimized performance",
|
|
54
|
+
"revert": "reverted previous commit",
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def strip_type_prefix(text: str) -> str:
|
|
59
|
+
"""Remove a conventional-commit prefix from ``text`` when present."""
|
|
60
|
+
|
|
61
|
+
first_line = _clean_markdown_text(text).splitlines()[0].strip() if text.strip() else ""
|
|
62
|
+
match = _PREFIX_RE.match(first_line)
|
|
63
|
+
summary = match.group("summary") if match else first_line
|
|
64
|
+
return _strip_trailing_period(_strip_wrapping_quotes(summary.strip()))
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def fallback_summary(
|
|
68
|
+
stat: str = "", details: Iterable[str] = (), diff: str = "", *, limit: int = 72, commit_type: str = "chore"
|
|
69
|
+
) -> str:
|
|
70
|
+
"""Return a deterministic, type-aware summary when model output cannot be parsed."""
|
|
71
|
+
|
|
72
|
+
normalized_type = _normalize_commit_type(commit_type) or "chore"
|
|
73
|
+
candidate = ""
|
|
74
|
+
needs_verb = False
|
|
75
|
+
for detail in details:
|
|
76
|
+
candidate = strip_type_prefix(str(detail).lstrip("-*•–+ ").strip())
|
|
77
|
+
if candidate:
|
|
78
|
+
candidate, needs_verb = _strip_leading_type_word(candidate, normalized_type)
|
|
79
|
+
break
|
|
80
|
+
if not candidate:
|
|
81
|
+
area = _primary_stat_subject(stat) or _primary_stat_subject(diff)
|
|
82
|
+
candidate = "Updated files" if area is None or area.lower() == "files" else f"Updated {area}"
|
|
83
|
+
candidate = " ".join(candidate.replace("\n", " ").replace("\r", " ").split()).strip().rstrip(".;:").strip()
|
|
84
|
+
if not candidate:
|
|
85
|
+
candidate = "Updated files"
|
|
86
|
+
if needs_verb and not _starts_with_past_tense(candidate):
|
|
87
|
+
candidate = f"{_summary_verb(normalized_type)} {candidate}"
|
|
88
|
+
cap = max(1, min(limit, 50))
|
|
89
|
+
candidate = _truncate_summary(candidate, cap).rstrip(".")
|
|
90
|
+
first_word = candidate.split(maxsplit=1)[0] if candidate else ""
|
|
91
|
+
if first_word.lower() == normalized_type.lower():
|
|
92
|
+
candidate = _safe_summary_default(normalized_type)
|
|
93
|
+
return _truncate_summary(candidate, cap)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def parse_summary_markdown(text: str) -> str:
|
|
97
|
+
"""Parse a summary from markdown, XML-ish tags, JSON, or plain text."""
|
|
98
|
+
|
|
99
|
+
if not text.strip():
|
|
100
|
+
return ""
|
|
101
|
+
jsonish = _try_json(text)
|
|
102
|
+
if isinstance(jsonish, Mapping):
|
|
103
|
+
for key in ("summary", "title", "message"):
|
|
104
|
+
value = jsonish.get(key)
|
|
105
|
+
if isinstance(value, str) and value.strip():
|
|
106
|
+
return strip_type_prefix(value)
|
|
107
|
+
cleaned = _clean_markdown_text(text)
|
|
108
|
+
tagged = _extract_tag_lenient(cleaned, "summary")
|
|
109
|
+
raw = tagged if tagged is not None else cleaned
|
|
110
|
+
stripped = _strip_heading_markers(raw)
|
|
111
|
+
stripped = _strip_label_prefix(stripped)
|
|
112
|
+
stripped = _strip_wrapping_quotes(stripped)
|
|
113
|
+
summary = " ".join(stripped.split())
|
|
114
|
+
if not summary:
|
|
115
|
+
raise ValueError("markdown summary empty after normalization")
|
|
116
|
+
return strip_type_prefix(summary)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def parse_conventional_analysis_markdown(text: str, *, default_type: str = "chore") -> ConventionalAnalysis:
|
|
120
|
+
"""Parse a conventional analysis from markdown, JSON, or plain text."""
|
|
121
|
+
|
|
122
|
+
payload = _try_json(text)
|
|
123
|
+
if isinstance(payload, Mapping):
|
|
124
|
+
return analysis_from_mapping(payload, default_type=default_type)
|
|
125
|
+
|
|
126
|
+
cleaned = _clean_markdown_text(text)
|
|
127
|
+
lines = cleaned.splitlines()
|
|
128
|
+
heading: tuple[int, str, str | None, str] | None = None
|
|
129
|
+
coerced: tuple[int, str, str | None, str] | None = None
|
|
130
|
+
for index, line in enumerate(lines[:5]):
|
|
131
|
+
candidate = _strip_heading_markers(line)
|
|
132
|
+
parsed = _parse_heading_line(candidate, coerce=False)
|
|
133
|
+
if parsed is not None:
|
|
134
|
+
heading = (index, *parsed)
|
|
135
|
+
break
|
|
136
|
+
if coerced is None and line.strip().startswith("#"):
|
|
137
|
+
parsed = _parse_heading_line(candidate, coerce=True)
|
|
138
|
+
if parsed is not None:
|
|
139
|
+
coerced = (index, *parsed)
|
|
140
|
+
if heading is None:
|
|
141
|
+
heading = coerced
|
|
142
|
+
if heading is None:
|
|
143
|
+
raise ValueError("markdown analysis type(scope): summary heading not found")
|
|
144
|
+
|
|
145
|
+
heading_index, commit_type, scope, summary = heading
|
|
146
|
+
detail_texts: list[str] = []
|
|
147
|
+
issue_refs: list[str] = []
|
|
148
|
+
for line in lines[heading_index + 1 :]:
|
|
149
|
+
stripped = line.strip()
|
|
150
|
+
if not stripped:
|
|
151
|
+
continue
|
|
152
|
+
lower = stripped.lower()
|
|
153
|
+
if lower.startswith(("fixes:", "closes:", "resolves:")):
|
|
154
|
+
_, refs = stripped.split(":", 1)
|
|
155
|
+
issue_refs.extend(ref.strip() for ref in refs.split(",") if ref.strip())
|
|
156
|
+
continue
|
|
157
|
+
bullet = _strip_bullet(stripped)
|
|
158
|
+
if bullet:
|
|
159
|
+
detail_texts.append(_ensure_sentence(bullet))
|
|
160
|
+
issue_refs.extend(_ISSUE_RE.findall(bullet))
|
|
161
|
+
|
|
162
|
+
details = tuple(AnalysisDetail.simple(detail) for detail in _dedupe(detail_texts))
|
|
163
|
+
return ConventionalAnalysis(
|
|
164
|
+
commit_type=commit_type, scope=scope, summary=summary, details=details, issue_refs=tuple(_dedupe(issue_refs))
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def parse_fast_commit_markdown(text: str, *, default_type: str = "chore") -> ConventionalCommit:
|
|
169
|
+
"""Parse a complete conventional commit from markdown or JSON text."""
|
|
170
|
+
|
|
171
|
+
payload = _try_json(text)
|
|
172
|
+
if isinstance(payload, Mapping):
|
|
173
|
+
analysis = analysis_from_mapping(payload, default_type=default_type)
|
|
174
|
+
else:
|
|
175
|
+
analysis = parse_conventional_analysis_markdown(text, default_type=default_type)
|
|
176
|
+
return ConventionalCommit.from_raw(
|
|
177
|
+
commit_type=str(analysis.commit_type),
|
|
178
|
+
scope=None if analysis.scope is None else str(analysis.scope),
|
|
179
|
+
summary=analysis.summary or fallback_summary(details=analysis.body_texts()),
|
|
180
|
+
body=analysis.body_texts(),
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def analysis_from_mapping(payload: Mapping[str, Any], *, default_type: str = "chore") -> ConventionalAnalysis:
|
|
185
|
+
"""Coerce a JSON-like mapping into ``ConventionalAnalysis``."""
|
|
186
|
+
|
|
187
|
+
commit_type = str(payload.get("type") or payload.get("commit_type") or default_type).strip() or default_type
|
|
188
|
+
raw_scope = payload.get("scope")
|
|
189
|
+
scope_text = "" if raw_scope is None else str(raw_scope).strip()
|
|
190
|
+
scope = None if scope_text.lower() in {"", "null", "none", "(none)"} else scope_text
|
|
191
|
+
summary = strip_type_prefix(str(payload.get("summary") or "")) or None
|
|
192
|
+
raw_details = payload.get("details") or payload.get("body") or []
|
|
193
|
+
details = tuple(_coerce_detail(item) for item in _coerce_iterable(raw_details) if _detail_text(item))
|
|
194
|
+
issue_refs = tuple(
|
|
195
|
+
str(item).strip()
|
|
196
|
+
for item in _coerce_iterable(payload.get("issue_refs") or payload.get("issues") or ())
|
|
197
|
+
if str(item).strip()
|
|
198
|
+
)
|
|
199
|
+
return ConventionalAnalysis(
|
|
200
|
+
commit_type=commit_type, scope=scope, summary=summary, details=details, issue_refs=issue_refs
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def parse_file_observations_markdown(text: str) -> list[dict[str, Any]]:
|
|
205
|
+
"""Parse map-phase file observations from JSON or lenient markdown."""
|
|
206
|
+
|
|
207
|
+
payload = _try_json(text)
|
|
208
|
+
if isinstance(payload, Mapping):
|
|
209
|
+
files = payload.get("files", [])
|
|
210
|
+
if isinstance(files, list):
|
|
211
|
+
return [_coerce_file_observations(item) for item in files if isinstance(item, Mapping)]
|
|
212
|
+
if isinstance(payload, list):
|
|
213
|
+
return [_coerce_file_observations(item) for item in payload if isinstance(item, Mapping)]
|
|
214
|
+
|
|
215
|
+
files: list[dict[str, Any]] = []
|
|
216
|
+
current_path: str | None = None
|
|
217
|
+
current_obs: list[str] = []
|
|
218
|
+
for line in _clean_markdown_text(text).splitlines():
|
|
219
|
+
stripped = line.strip()
|
|
220
|
+
if not stripped:
|
|
221
|
+
continue
|
|
222
|
+
heading = re.match(r"^(?:#+\s*)?(?:file\s*[:=-]\s*)?`?([^`]+?)`?\s*:??$", stripped, re.IGNORECASE)
|
|
223
|
+
bullet = _strip_bullet(stripped)
|
|
224
|
+
if bullet is None and heading and ("/" in heading.group(1) or "." in heading.group(1)):
|
|
225
|
+
if current_path is not None:
|
|
226
|
+
files.append({"path": current_path, "observations": current_obs})
|
|
227
|
+
current_path = heading.group(1).strip()
|
|
228
|
+
current_obs = []
|
|
229
|
+
elif bullet is not None and current_path is not None:
|
|
230
|
+
current_obs.append(_strip_trailing_period(bullet))
|
|
231
|
+
if current_path is not None:
|
|
232
|
+
files.append({"path": current_path, "observations": current_obs})
|
|
233
|
+
return files
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def parse_compose_intent_markdown(text: str) -> dict[str, Any]:
|
|
237
|
+
"""Parse markdown compose intent output into a JSON-like mapping."""
|
|
238
|
+
|
|
239
|
+
trimmed = _clean_markdown_text(text)
|
|
240
|
+
groups: list[dict[str, Any]] = []
|
|
241
|
+
group_map: dict[str, int] = {}
|
|
242
|
+
for line in trimmed.splitlines():
|
|
243
|
+
trimmed_line = line.strip()
|
|
244
|
+
if ":=" not in trimmed_line:
|
|
245
|
+
continue
|
|
246
|
+
gid, rest = trimmed_line.split(":=", 1)
|
|
247
|
+
gid = gid.strip()
|
|
248
|
+
rest = rest.strip()
|
|
249
|
+
if not gid or ":" not in rest:
|
|
250
|
+
continue
|
|
251
|
+
type_scope, rationale = rest.split(":", 1)
|
|
252
|
+
commit_type, scope = _parse_compose_type_scope(type_scope.strip())
|
|
253
|
+
group_map[gid] = len(groups)
|
|
254
|
+
groups.append(
|
|
255
|
+
{
|
|
256
|
+
"group_id": gid,
|
|
257
|
+
"type": str(coerce_commit_type(commit_type)),
|
|
258
|
+
"scope": scope,
|
|
259
|
+
"rationale": rationale.strip(),
|
|
260
|
+
"file_ids": [],
|
|
261
|
+
"dependencies": [],
|
|
262
|
+
}
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
for line in trimmed.splitlines():
|
|
266
|
+
trimmed_line = line.strip()
|
|
267
|
+
if "<-" not in trimmed_line:
|
|
268
|
+
continue
|
|
269
|
+
gid, deps_text = trimmed_line.split("<-", 1)
|
|
270
|
+
idx = group_map.get(gid.strip())
|
|
271
|
+
if idx is not None:
|
|
272
|
+
groups[idx]["dependencies"] = [dep.strip() for dep in deps_text.strip().split(",") if dep.strip()]
|
|
273
|
+
|
|
274
|
+
in_files_section = False
|
|
275
|
+
for line in trimmed.splitlines():
|
|
276
|
+
trimmed_line = line.strip()
|
|
277
|
+
if trimmed_line.lower().startswith("files:"):
|
|
278
|
+
in_files_section = True
|
|
279
|
+
continue
|
|
280
|
+
bullet = _bullet_content(trimmed_line)
|
|
281
|
+
if not in_files_section or bullet is None or ":" not in bullet:
|
|
282
|
+
continue
|
|
283
|
+
gid, files_text = bullet.split(":", 1)
|
|
284
|
+
idx = group_map.get(gid.strip())
|
|
285
|
+
if idx is not None:
|
|
286
|
+
groups[idx]["file_ids"] = [file_id.strip() for file_id in files_text.strip().split(",")]
|
|
287
|
+
|
|
288
|
+
if not groups:
|
|
289
|
+
raise ValueError("markdown compose intent: no groups found (format: G1 := type(scope): rationale)")
|
|
290
|
+
return {"groups": groups}
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def parse_compose_binding_markdown(text: str) -> dict[str, Any]:
|
|
294
|
+
"""Parse markdown compose hunk-binding output into a JSON-like mapping."""
|
|
295
|
+
|
|
296
|
+
assignments: list[dict[str, Any]] = []
|
|
297
|
+
current_group: str | None = None
|
|
298
|
+
current_hunks: list[str] = []
|
|
299
|
+
for line in _clean_markdown_text(text).splitlines():
|
|
300
|
+
trimmed_line = line.strip()
|
|
301
|
+
if trimmed_line.startswith("#"):
|
|
302
|
+
if current_group is not None:
|
|
303
|
+
assignments.append({"group_id": current_group, "hunk_ids": current_hunks})
|
|
304
|
+
current_hunks = []
|
|
305
|
+
current_group = trimmed_line.lstrip("#").strip().rstrip(":").strip()
|
|
306
|
+
continue
|
|
307
|
+
hunk_id = _bullet_content(trimmed_line)
|
|
308
|
+
if hunk_id is not None:
|
|
309
|
+
current_hunks.append(hunk_id)
|
|
310
|
+
if current_group is not None:
|
|
311
|
+
assignments.append({"group_id": current_group, "hunk_ids": current_hunks})
|
|
312
|
+
if not assignments:
|
|
313
|
+
raise ValueError("markdown compose binding: no assignments found (format: # group_id\\n- hunk_id)")
|
|
314
|
+
return {"assignments": assignments}
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def _parse_compose_type_scope(type_scope: str) -> tuple[str, str | None]:
|
|
318
|
+
if "(" in type_scope:
|
|
319
|
+
p_start = type_scope.find("(")
|
|
320
|
+
p_end = type_scope.find(")", p_start + 1)
|
|
321
|
+
if p_end >= 0:
|
|
322
|
+
commit_type = type_scope[:p_start].strip()
|
|
323
|
+
scope = type_scope[p_start + 1 : p_end].strip()
|
|
324
|
+
return commit_type, scope or None
|
|
325
|
+
return type_scope, None
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _coerce_detail(item: Any) -> AnalysisDetail:
|
|
329
|
+
text = _detail_text(item)
|
|
330
|
+
category: ChangelogCategory | None = None
|
|
331
|
+
user_visible = False
|
|
332
|
+
if isinstance(item, Mapping):
|
|
333
|
+
raw_category = item.get("changelog_category") or item.get("category")
|
|
334
|
+
if raw_category:
|
|
335
|
+
category = ChangelogCategory.from_name(str(raw_category))
|
|
336
|
+
user_visible = bool(item.get("user_visible", category is not None))
|
|
337
|
+
else:
|
|
338
|
+
text, category = _strip_category_prefix(text)
|
|
339
|
+
user_visible = category is not None
|
|
340
|
+
return AnalysisDetail(text=_ensure_sentence(text), changelog_category=category, user_visible=user_visible)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _detail_text(item: Any) -> str:
|
|
344
|
+
if isinstance(item, Mapping):
|
|
345
|
+
return str(item.get("text") or item.get("summary") or item.get("detail") or "").strip()
|
|
346
|
+
return str(item).strip()
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def _coerce_file_observations(item: Mapping[str, Any]) -> dict[str, Any]:
|
|
350
|
+
observations = _coerce_observation_strings(item.get("observations") or item.get("details") or [])
|
|
351
|
+
return {"path": str(item.get("path") or item.get("file") or ""), "observations": observations}
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def _coerce_observation_strings(value: Any) -> list[str]:
|
|
355
|
+
if isinstance(value, str):
|
|
356
|
+
stripped = value.strip()
|
|
357
|
+
if stripped.startswith("["):
|
|
358
|
+
try:
|
|
359
|
+
decoded = json.loads(stripped)
|
|
360
|
+
if isinstance(decoded, list):
|
|
361
|
+
return [str(item).strip() for item in decoded if str(item).strip()]
|
|
362
|
+
except json.JSONDecodeError:
|
|
363
|
+
pass
|
|
364
|
+
return [line.lstrip("-*• ").strip() for line in stripped.splitlines() if line.lstrip("-*• ").strip()]
|
|
365
|
+
return [str(item).strip() for item in _coerce_iterable(value) if str(item).strip()]
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def _coerce_iterable(value: Any) -> Iterable[Any]:
|
|
369
|
+
if value is None:
|
|
370
|
+
return ()
|
|
371
|
+
if isinstance(value, str):
|
|
372
|
+
return (value,) if value.strip() else ()
|
|
373
|
+
if isinstance(value, Iterable):
|
|
374
|
+
return value
|
|
375
|
+
return (value,)
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def _normalize_commit_type(raw: str) -> str | None:
|
|
379
|
+
try:
|
|
380
|
+
return str(CommitType.from_raw(raw))
|
|
381
|
+
except InvalidCommitType:
|
|
382
|
+
return None
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def _summary_verb(commit_type: str) -> str:
|
|
386
|
+
return _SUMMARY_VERBS.get(commit_type, "changed")
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def _safe_summary_default(commit_type: str) -> str:
|
|
390
|
+
return _SUMMARY_SAFE_DEFAULTS.get(commit_type, "updated files")
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def _strip_leading_type_word(text: str, commit_type: str) -> tuple[str, bool]:
|
|
394
|
+
cleaned = text.strip().rstrip(".")
|
|
395
|
+
variants = {commit_type, f"{commit_type}ed", f"{commit_type}d"}
|
|
396
|
+
for variant in sorted(variants, key=len, reverse=True):
|
|
397
|
+
prefix = f"{variant.lower()} "
|
|
398
|
+
if cleaned.lower().startswith(prefix):
|
|
399
|
+
return cleaned[len(variant) :].strip(), True
|
|
400
|
+
return cleaned, False
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def _starts_with_past_tense(text: str) -> bool:
|
|
404
|
+
words = text.split()
|
|
405
|
+
first = words[0].lower() if words else ""
|
|
406
|
+
return first.endswith("ed") or first in {
|
|
407
|
+
"built",
|
|
408
|
+
"changed",
|
|
409
|
+
"documented",
|
|
410
|
+
"fixed",
|
|
411
|
+
"optimized",
|
|
412
|
+
"restructured",
|
|
413
|
+
"updated",
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def _primary_stat_subject(text: str) -> str | None:
|
|
418
|
+
for line in text.splitlines():
|
|
419
|
+
stripped = line.strip()
|
|
420
|
+
if not stripped:
|
|
421
|
+
continue
|
|
422
|
+
subject = stripped.split("|", 1)[0].strip()
|
|
423
|
+
return subject or "files"
|
|
424
|
+
return None
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def _try_json(text: str) -> Any:
|
|
428
|
+
cleaned = _clean_markdown_text(text).strip()
|
|
429
|
+
candidates = [cleaned]
|
|
430
|
+
fenced = re.search(r"```(?:json)?\s*(.*?)```", text, re.IGNORECASE | re.DOTALL)
|
|
431
|
+
if fenced:
|
|
432
|
+
candidates.insert(0, fenced.group(1).strip())
|
|
433
|
+
for candidate in candidates:
|
|
434
|
+
if not candidate or candidate[0] not in "[{":
|
|
435
|
+
continue
|
|
436
|
+
try:
|
|
437
|
+
return json.loads(candidate)
|
|
438
|
+
except json.JSONDecodeError:
|
|
439
|
+
continue
|
|
440
|
+
return None
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
def _clean_markdown_text(text: str) -> str:
|
|
444
|
+
cleaned = _normalize_escaped_whitespace(text.strip())
|
|
445
|
+
if cleaned.startswith("```"):
|
|
446
|
+
after_open = cleaned[3:]
|
|
447
|
+
content_start = after_open.find("\n")
|
|
448
|
+
if content_start >= 0:
|
|
449
|
+
body = after_open[content_start + 1 :]
|
|
450
|
+
end = body.rfind("```")
|
|
451
|
+
cleaned = (body[:end] if end >= 0 else body).strip()
|
|
452
|
+
else:
|
|
453
|
+
cleaned = "\n".join(
|
|
454
|
+
line for line in cleaned.splitlines() if line.strip() != "```" and not line.lstrip().startswith("```")
|
|
455
|
+
).strip()
|
|
456
|
+
return cleaned.replace("\r\n", "\n")
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def _strip_bullet(line: str) -> str | None:
|
|
460
|
+
bullet = _bullet_content(line)
|
|
461
|
+
if bullet is not None and bullet:
|
|
462
|
+
return bullet
|
|
463
|
+
match = re.match(r"^\s*\d+[.)]\s+(?P<text>.+)$", line)
|
|
464
|
+
return match.group("text").strip() if match else None
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
def _bullet_content(line: str) -> str | None:
|
|
468
|
+
stripped = line.lstrip()
|
|
469
|
+
for glyph in ("- ", "* ", "• ", "– ", "+ "):
|
|
470
|
+
if stripped.startswith(glyph):
|
|
471
|
+
return stripped[len(glyph) :].strip()
|
|
472
|
+
return None
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def _strip_category_prefix(text: str) -> tuple[str, ChangelogCategory | None]:
|
|
476
|
+
match = _CATEGORY_RE.match(text)
|
|
477
|
+
if not match:
|
|
478
|
+
return text.strip(), None
|
|
479
|
+
raw_category = match.group("bracket") or match.group("prefix") or ""
|
|
480
|
+
return match.group("text").strip(), ChangelogCategory.from_name(raw_category)
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def _strip_wrapping_quotes(text: str) -> str:
|
|
484
|
+
pairs = {'"': '"', "'": "'", "`": "`", "“": "”", "‘": "’"}
|
|
485
|
+
stripped = text.strip()
|
|
486
|
+
if len(stripped) >= 2 and pairs.get(stripped[0]) == stripped[-1]:
|
|
487
|
+
return stripped[1:-1].strip()
|
|
488
|
+
return stripped
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def _normalize_escaped_whitespace(text: str) -> str:
|
|
492
|
+
real = text.count("\n")
|
|
493
|
+
literal = text.count("\\n")
|
|
494
|
+
if literal == 0 or literal < real:
|
|
495
|
+
return text
|
|
496
|
+
return text.replace("\\r\\n", "\n").replace("\\n", "\n").replace("\\r", "\n").replace("\\t", "\t")
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
def _extract_tag_lenient(text: str, tag: str) -> str | None:
|
|
500
|
+
lower = text.lower()
|
|
501
|
+
open_tag = f"<{tag}"
|
|
502
|
+
open_pos = lower.find(open_tag)
|
|
503
|
+
if open_pos < 0:
|
|
504
|
+
return None
|
|
505
|
+
open_end = text.find(">", open_pos)
|
|
506
|
+
if open_end < 0:
|
|
507
|
+
return None
|
|
508
|
+
content_start = open_end + 1
|
|
509
|
+
rest = text[content_start:]
|
|
510
|
+
close_pos = rest.find("</")
|
|
511
|
+
return (rest[:close_pos] if close_pos >= 0 else rest).strip()
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def _strip_label_prefix(text: str) -> str:
|
|
515
|
+
stripped = text.strip()
|
|
516
|
+
if ":" not in stripped:
|
|
517
|
+
return stripped
|
|
518
|
+
label, remainder = stripped.split(":", 1)
|
|
519
|
+
if label.strip().lower() in {"title", "summary", "description", "result"}:
|
|
520
|
+
return remainder.strip()
|
|
521
|
+
return stripped
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def _strip_heading_markers(text: str) -> str:
|
|
525
|
+
stripped = text.strip().lstrip("#").strip()
|
|
526
|
+
for marker in ("**", "*", "__", "_"):
|
|
527
|
+
if stripped.startswith(marker) and stripped.endswith(marker) and len(stripped) > 2 * len(marker):
|
|
528
|
+
stripped = stripped[len(marker) : -len(marker)].strip()
|
|
529
|
+
return stripped
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def _parse_heading_line(line: str, *, coerce: bool) -> tuple[str, str | None, str] | None:
|
|
533
|
+
split = _split_heading(line)
|
|
534
|
+
if split is None:
|
|
535
|
+
return None
|
|
536
|
+
commit_type, scope, summary = split
|
|
537
|
+
canonical = _normalize_commit_type(commit_type)
|
|
538
|
+
if canonical is not None:
|
|
539
|
+
return canonical, scope, summary
|
|
540
|
+
if coerce and _is_bare_word(commit_type) and not summary.startswith(('"', "{", "[")):
|
|
541
|
+
return str(coerce_commit_type(commit_type)), scope, summary
|
|
542
|
+
return None
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def _split_heading(line: str) -> tuple[str, str | None, str] | None:
|
|
546
|
+
if ":" not in line:
|
|
547
|
+
return None
|
|
548
|
+
type_scope, summary = line.split(":", 1)
|
|
549
|
+
type_scope = type_scope.strip()
|
|
550
|
+
summary = summary.strip()
|
|
551
|
+
if not type_scope or not summary:
|
|
552
|
+
return None
|
|
553
|
+
scope: str | None = None
|
|
554
|
+
if "(" in type_scope:
|
|
555
|
+
p_start = type_scope.find("(")
|
|
556
|
+
p_end = type_scope.find(")", p_start + 1)
|
|
557
|
+
if p_end < 0:
|
|
558
|
+
return None
|
|
559
|
+
commit_type = type_scope[:p_start].strip()
|
|
560
|
+
scope_text = type_scope[p_start + 1 : p_end].strip()
|
|
561
|
+
type_scope = commit_type
|
|
562
|
+
scope = scope_text or None
|
|
563
|
+
if not type_scope:
|
|
564
|
+
return None
|
|
565
|
+
return type_scope, scope, summary
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
def _is_bare_word(text: str) -> bool:
|
|
569
|
+
return bool(text) and text[0].isalpha() and all(ch.isalpha() or ch == "-" for ch in text)
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def parse_changelog_response(text: str) -> dict[str, dict[str, list[str]]]:
|
|
573
|
+
"""Parse markdown changelog output into an ``entries`` mapping."""
|
|
574
|
+
|
|
575
|
+
cleaned = _clean_markdown_text(text)
|
|
576
|
+
if _has_exception_tag(cleaned):
|
|
577
|
+
return {"entries": {}}
|
|
578
|
+
known = {"Added", "Changed", "Fixed", "Deprecated", "Removed", "Security", "Breaking Changes"}
|
|
579
|
+
entries: dict[str, list[str]] = {}
|
|
580
|
+
current: str | None = None
|
|
581
|
+
for raw_line in cleaned.splitlines():
|
|
582
|
+
line = raw_line.strip()
|
|
583
|
+
if not line:
|
|
584
|
+
continue
|
|
585
|
+
candidate, inline = _changelog_heading(line)
|
|
586
|
+
if candidate in known:
|
|
587
|
+
current = candidate
|
|
588
|
+
entries.setdefault(current, [])
|
|
589
|
+
if inline:
|
|
590
|
+
entries[current].append(inline)
|
|
591
|
+
continue
|
|
592
|
+
if candidate is None and line.startswith("#"):
|
|
593
|
+
current = _strip_heading_markers(line).rstrip(":").strip()
|
|
594
|
+
entries.setdefault(current, [])
|
|
595
|
+
continue
|
|
596
|
+
bullet = _strip_bullet(line)
|
|
597
|
+
if bullet is not None:
|
|
598
|
+
text_part, category = _strip_category_prefix(_strip_heading_markers(bullet))
|
|
599
|
+
if category is not None:
|
|
600
|
+
entries.setdefault(category.value, []).append(text_part)
|
|
601
|
+
continue
|
|
602
|
+
if current is None:
|
|
603
|
+
maybe_category, inline_bullet = _changelog_heading(text_part)
|
|
604
|
+
if maybe_category in known and inline_bullet:
|
|
605
|
+
entries.setdefault(maybe_category, []).append(inline_bullet)
|
|
606
|
+
continue
|
|
607
|
+
entries.setdefault(current, []).append(text_part)
|
|
608
|
+
elif current is not None:
|
|
609
|
+
entries.setdefault(current, []).append(line)
|
|
610
|
+
if not any(values for values in entries.values()):
|
|
611
|
+
raise ValueError("No changelog entries found in response")
|
|
612
|
+
return {"entries": entries}
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def _has_exception_tag(text: str) -> bool:
|
|
616
|
+
return re.search(r"<exception(?:\s|/|>|$)", text, re.IGNORECASE) is not None
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
def _changelog_heading(line: str) -> tuple[str | None, str]:
|
|
620
|
+
stripped = _strip_heading_markers(line).rstrip(":").strip()
|
|
621
|
+
stripped = _strip_wrapping_quotes(stripped)
|
|
622
|
+
if ":" in stripped:
|
|
623
|
+
head, inline = stripped.split(":", 1)
|
|
624
|
+
category = _known_category_name(_category_token(head))
|
|
625
|
+
inline = inline.strip().lstrip("*_`").strip()
|
|
626
|
+
return category, inline
|
|
627
|
+
category = _known_category_name(_category_token(stripped))
|
|
628
|
+
return category, ""
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
def _category_token(text: str) -> str:
|
|
632
|
+
return _strip_heading_markers(text).strip("*_`\"'“”‘’ ")
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def _known_category_name(text: str) -> str | None:
|
|
636
|
+
normalized = text.strip().lower()
|
|
637
|
+
if normalized == "breaking":
|
|
638
|
+
return ChangelogCategory.BREAKING.value
|
|
639
|
+
for category in ChangelogCategory:
|
|
640
|
+
if category.value.lower() == normalized:
|
|
641
|
+
return category.value
|
|
642
|
+
return None
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
def _strip_trailing_period(text: str) -> str:
|
|
646
|
+
return text[:-1].rstrip() if text.rstrip().endswith(".") else text.strip()
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
def _ensure_sentence(text: str) -> str:
|
|
650
|
+
cleaned = text.strip()
|
|
651
|
+
if not cleaned:
|
|
652
|
+
return ""
|
|
653
|
+
return cleaned if cleaned.endswith((".", "!", "?")) else f"{cleaned}."
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
def _truncate_summary(text: str, limit: int) -> str:
|
|
657
|
+
cleaned = _strip_trailing_period(text.strip())
|
|
658
|
+
if len(cleaned) <= limit:
|
|
659
|
+
return cleaned
|
|
660
|
+
return cleaned[: max(1, limit)].rsplit(" ", 1)[0].rstrip(" ,;:-") or cleaned[:limit].rstrip(" ,;:-")
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
def _dominant_area(text: str) -> str | None:
|
|
664
|
+
for line in text.splitlines():
|
|
665
|
+
match = re.search(r"(?:^|\s)(?:[ab]/)?([\w.-]+)(?:/|\.)", line)
|
|
666
|
+
if match:
|
|
667
|
+
return match.group(1).strip("._-").lower() or None
|
|
668
|
+
return None
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
def _dedupe(values: Iterable[str]) -> list[str]:
|
|
672
|
+
seen: set[str] = set()
|
|
673
|
+
result: list[str] = []
|
|
674
|
+
for value in values:
|
|
675
|
+
key = value.strip().lower()
|
|
676
|
+
if key and key not in seen:
|
|
677
|
+
seen.add(key)
|
|
678
|
+
result.append(value.strip())
|
|
679
|
+
return result
|
|
680
|
+
|
|
681
|
+
|
|
682
|
+
parse_conventional_analysis = parse_conventional_analysis_markdown
|
|
683
|
+
parse_summary_output = parse_summary_markdown
|
|
684
|
+
parse_fast_commit = parse_fast_commit_markdown
|
|
685
|
+
parse_batch_observations = parse_file_observations_markdown
|
|
686
|
+
parse_changelog_entries = parse_changelog_response
|
|
687
|
+
parse_compose_intent = parse_compose_intent_markdown
|
|
688
|
+
parse_compose_binding = parse_compose_binding_markdown
|
|
689
|
+
|
|
690
|
+
|
|
691
|
+
__all__ = [
|
|
692
|
+
"analysis_from_mapping",
|
|
693
|
+
"fallback_summary",
|
|
694
|
+
"parse_batch_observations",
|
|
695
|
+
"parse_changelog_entries",
|
|
696
|
+
"parse_changelog_response",
|
|
697
|
+
"parse_compose_binding",
|
|
698
|
+
"parse_compose_binding_markdown",
|
|
699
|
+
"parse_conventional_analysis",
|
|
700
|
+
"parse_conventional_analysis_markdown",
|
|
701
|
+
"parse_compose_intent",
|
|
702
|
+
"parse_compose_intent_markdown",
|
|
703
|
+
"parse_fast_commit",
|
|
704
|
+
"parse_fast_commit_markdown",
|
|
705
|
+
"parse_file_observations_markdown",
|
|
706
|
+
"parse_summary_markdown",
|
|
707
|
+
"parse_summary_output",
|
|
708
|
+
"strip_type_prefix",
|
|
709
|
+
]
|