lgit-cli 3.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. lgit/__init__.py +75 -0
  2. lgit/__main__.py +8 -0
  3. lgit/analysis.py +326 -0
  4. lgit/api.py +1077 -0
  5. lgit/cache.py +338 -0
  6. lgit/changelog.py +523 -0
  7. lgit/cli.py +1104 -0
  8. lgit/compose.py +2110 -0
  9. lgit/config.py +437 -0
  10. lgit/diffing.py +384 -0
  11. lgit/errors.py +137 -0
  12. lgit/git.py +852 -0
  13. lgit/map_reduce.py +508 -0
  14. lgit/markdown_output.py +709 -0
  15. lgit/models.py +924 -0
  16. lgit/normalization.py +411 -0
  17. lgit/patch.py +784 -0
  18. lgit/profile.py +426 -0
  19. lgit/py.typed +0 -0
  20. lgit/repo.py +287 -0
  21. lgit/resources/__init__.py +1 -0
  22. lgit/resources/commit_types.json +242 -0
  23. lgit/resources/prompts/analysis/default.md +237 -0
  24. lgit/resources/prompts/analysis/markdown.md +112 -0
  25. lgit/resources/prompts/changelog/default.md +89 -0
  26. lgit/resources/prompts/changelog/markdown.md +60 -0
  27. lgit/resources/prompts/compose-bind/default.md +40 -0
  28. lgit/resources/prompts/compose-bind/markdown.md +41 -0
  29. lgit/resources/prompts/compose-intent/default.md +63 -0
  30. lgit/resources/prompts/compose-intent/markdown.md +59 -0
  31. lgit/resources/prompts/fast/default.md +46 -0
  32. lgit/resources/prompts/fast/markdown.md +51 -0
  33. lgit/resources/prompts/map/default.md +67 -0
  34. lgit/resources/prompts/map/markdown.md +63 -0
  35. lgit/resources/prompts/reduce/default.md +81 -0
  36. lgit/resources/prompts/reduce/markdown.md +68 -0
  37. lgit/resources/prompts/summary/default.md +74 -0
  38. lgit/resources/prompts/summary/markdown.md +77 -0
  39. lgit/resources/validation_data.json +1 -0
  40. lgit/rewrite.py +392 -0
  41. lgit/style.py +295 -0
  42. lgit/templates.py +385 -0
  43. lgit/testing/__init__.py +62 -0
  44. lgit/testing/compare.py +57 -0
  45. lgit/testing/fixture.py +386 -0
  46. lgit/testing/report.py +201 -0
  47. lgit/testing/runner.py +256 -0
  48. lgit/tokens.py +90 -0
  49. lgit/validation.py +545 -0
  50. lgit_cli-3.7.0.dist-info/METADATA +288 -0
  51. lgit_cli-3.7.0.dist-info/RECORD +54 -0
  52. lgit_cli-3.7.0.dist-info/WHEEL +4 -0
  53. lgit_cli-3.7.0.dist-info/entry_points.txt +2 -0
  54. lgit_cli-3.7.0.dist-info/licenses/LICENSE +21 -0
lgit/map_reduce.py ADDED
@@ -0,0 +1,508 @@
1
+ """Map-reduce analysis for large git diffs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import json
7
+ from collections.abc import Iterable, Mapping, Sequence
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from .api import (
13
+ OneShotSpec,
14
+ build_analysis_schema,
15
+ format_types_description,
16
+ render_prompt,
17
+ run_oneshot,
18
+ strict_json_schema,
19
+ )
20
+ from .diffing import FileDiff, parse_diff, reconstruct_diff
21
+ from .markdown_output import analysis_from_mapping, fallback_summary, parse_conventional_analysis_markdown
22
+ from .models import AnalysisDetail, ConventionalAnalysis, resolve_model_name
23
+ from .tokens import create_token_counter
24
+
25
+ MAX_FILE_TOKENS = 50_000
26
+ MAP_PHASE_CONCURRENCY = 16
27
+ MAX_CONTEXT_FILES = 20
28
+
29
+
30
+ @dataclass(frozen=True, slots=True)
31
+ class FileObservation:
32
+ """Factual observations extracted for one changed file."""
33
+
34
+ file: str
35
+ observations: tuple[str, ...]
36
+ additions: int = 0
37
+ deletions: int = 0
38
+
39
+
40
+ def should_use_map_reduce(diff: str, config: Any, counter: Any | None = None) -> bool:
41
+ """Return whether ``diff`` should be analyzed with map-reduce."""
42
+
43
+ if not bool(getattr(config, "map_reduce_enabled", True)):
44
+ return False
45
+ counter = counter or create_token_counter(config)
46
+ total_tokens = 0
47
+ has_included_file = False
48
+ for file in _included_files(parse_diff(diff), config):
49
+ has_included_file = True
50
+ file_tokens = file.token_estimate(counter)
51
+ if file_tokens > MAX_FILE_TOKENS:
52
+ return True
53
+ total_tokens += file_tokens
54
+ if total_tokens >= int(getattr(config, "map_reduce_threshold", 5000)):
55
+ return True
56
+ return has_included_file and total_tokens >= int(getattr(config, "map_reduce_threshold", 5000))
57
+
58
+
59
+ def build_file_batches(files: Sequence[FileDiff], counter: Any, budget: int) -> list[list[int]]:
60
+ """Group file indices into token-budgeted map batches."""
61
+
62
+ return _build_file_batches_for_indices(files, range(len(files)), counter, budget)
63
+
64
+
65
+ def build_llm_file_batches(files: Sequence[FileDiff], counter: Any, budget: int) -> list[list[int]]:
66
+ """Group non-binary files into token-budgeted LLM batches."""
67
+
68
+ indices = [idx for idx, file in enumerate(files) if not file.is_binary]
69
+ return _build_file_batches_for_indices(files, indices, counter, budget)
70
+
71
+
72
+ async def observe_diff_files(
73
+ diff: str, map_model_name: str, config: Any, counter: Any | None = None
74
+ ) -> list[FileObservation]:
75
+ """Run the map phase and return per-file observations."""
76
+
77
+ counter = counter or create_token_counter(config)
78
+ files = _included_files(parse_diff(diff), config)
79
+ if not files:
80
+ raise ValueError("No relevant files to summarize after filtering")
81
+ return await _map_phase(files, map_model_name, config, counter)
82
+
83
+
84
+ async def reduce_phase(
85
+ observations: Sequence[FileObservation], stat: str, scope_candidates: str, model_name: str, config: Any
86
+ ) -> ConventionalAnalysis:
87
+ """Synthesize map observations into final conventional analysis."""
88
+
89
+ type_enum = list(getattr(config, "types", {}) or {"chore": None})
90
+ observations_json = json.dumps(
91
+ [_observation_to_mapping(item) for item in observations], ensure_ascii=False, indent=2
92
+ )
93
+ variant = "markdown" if bool(getattr(config, "markdown_output", True)) else "default"
94
+ system_prompt, user_prompt = _render_reduce_prompt(
95
+ variant,
96
+ observations_json,
97
+ stat,
98
+ scope_candidates,
99
+ format_types_description(config),
100
+ )
101
+ response = await run_oneshot(
102
+ config,
103
+ OneShotSpec(
104
+ operation="map-reduce/reduce",
105
+ model=resolve_model_name(model_name),
106
+ prompt_family="reduce",
107
+ prompt_variant=variant,
108
+ system_prompt=system_prompt,
109
+ user_prompt=user_prompt,
110
+ tool_name="create_conventional_analysis",
111
+ tool_description="Analyze file observations and classify as a conventional commit",
112
+ schema=build_analysis_schema(type_enum, config),
113
+ progress_label="reduce file observations",
114
+ cacheable=True,
115
+ ),
116
+ )
117
+ output = response.output if hasattr(response, "output") else response
118
+ default_type = type_enum[0] if type_enum else "chore"
119
+ if isinstance(output, ConventionalAnalysis):
120
+ return output
121
+ if isinstance(output, Mapping):
122
+ return analysis_from_mapping(output, default_type=default_type)
123
+ text_content = getattr(response, "text_content", None)
124
+ if text_content:
125
+ try:
126
+ return parse_conventional_analysis_markdown(text_content, default_type=default_type)
127
+ except ValueError:
128
+ pass
129
+ return _fallback_reduce_analysis(observations, config)
130
+
131
+
132
+ async def run_map_reduce(*args: Any, **kwargs: Any) -> ConventionalAnalysis:
133
+ """Run map and reduce phases for a large diff.
134
+
135
+ Accepts Python order ``(config, stat, diff, scope_candidates=...)`` and the
136
+ Rust-port order ``(diff, stat, scope_candidates, model_name, config, counter)``.
137
+ """
138
+
139
+ if args and isinstance(args[0], str):
140
+ diff = args[0]
141
+ stat = args[1] if len(args) > 1 else kwargs.get("stat", "")
142
+ scope_candidates = args[2] if len(args) > 2 else kwargs.get("scope_candidates", "")
143
+ model_name = args[3] if len(args) > 3 else kwargs.get("model_name")
144
+ config = args[4] if len(args) > 4 else kwargs["config"]
145
+ counter = args[5] if len(args) > 5 else kwargs.get("counter")
146
+ else:
147
+ config = args[0] if args else kwargs["config"]
148
+ stat = args[1] if len(args) > 1 else kwargs.get("stat", "")
149
+ diff = args[2] if len(args) > 2 else kwargs.get("diff", "")
150
+ scope_candidates = args[3] if len(args) > 3 else kwargs.get("scope_candidates", "")
151
+ model_name = kwargs.get("model_name")
152
+ counter = kwargs.get("counter")
153
+
154
+ counter = counter or create_token_counter(config)
155
+ reduce_model = resolve_model_name(
156
+ str(model_name or getattr(config, "analysis_model", getattr(config, "model", "claude-opus-4.5")))
157
+ )
158
+ map_model = resolve_model_name(str(getattr(config, "summary_model", getattr(config, "model", reduce_model))))
159
+ observations = await observe_diff_files(str(diff), map_model, config, counter)
160
+ return await reduce_phase(observations, str(stat), str(scope_candidates), reduce_model, config)
161
+
162
+
163
+ async def _map_phase(
164
+ files: Sequence[FileDiff], map_model_name: str, config: Any, counter: Any
165
+ ) -> list[FileObservation]:
166
+ context_headers = _ContextHeaders(files)
167
+ batches = build_llm_file_batches(files, counter, int(getattr(config, "map_batch_token_budget", 16000)))
168
+ observations_by_index: list[FileObservation | None] = [None] * len(files)
169
+ for idx, file in enumerate(files):
170
+ if file.is_binary:
171
+ observations_by_index[idx] = FileObservation(
172
+ file.filename, ("Binary file changed.",), file.additions, file.deletions
173
+ )
174
+
175
+ semaphore = asyncio.Semaphore(MAP_PHASE_CONCURRENCY)
176
+
177
+ async def run_batch(batch_idx: int, batch_indices: list[int]) -> list[tuple[int, FileObservation]]:
178
+ async with semaphore:
179
+ batch_files = [files[idx] for idx in batch_indices]
180
+ paths = [file.filename for file in batch_files]
181
+ context_header = context_headers.header_for_files(paths)
182
+ observations = await _map_file_batch(
183
+ batch_files,
184
+ context_header,
185
+ map_model_name,
186
+ config,
187
+ counter,
188
+ f"map batch {batch_idx + 1}/{len(batches)} ({len(batch_files)} files)",
189
+ )
190
+ return list(zip(batch_indices, observations, strict=True))
191
+
192
+ results = await asyncio.gather(*(run_batch(idx, batch) for idx, batch in enumerate(batches)))
193
+ for batch_result in results:
194
+ for idx, observation in batch_result:
195
+ observations_by_index[idx] = observation
196
+ observations: list[FileObservation] = []
197
+ for idx, observation in enumerate(observations_by_index):
198
+ if observation is None:
199
+ raise RuntimeError(f"Missing map observation for {files[idx].filename}")
200
+ observations.append(observation)
201
+ return observations
202
+
203
+
204
+ async def _map_file_batch(
205
+ files: Sequence[FileDiff], context_header: str, model_name: str, config: Any, counter: Any, progress_label: str
206
+ ) -> list[FileObservation]:
207
+ rendered = [_render_file_diff_for_batch(file, counter) for file in files]
208
+ prompt_files = [{"path": file.filename, "diff": diff} for file, diff in zip(files, rendered, strict=True)]
209
+ variant = "markdown" if bool(getattr(config, "markdown_output", True)) else "default"
210
+ system_prompt, user_prompt = _render_map_prompt(variant, prompt_files, context_header)
211
+ response = await run_oneshot(
212
+ config,
213
+ OneShotSpec(
214
+ operation="map-reduce/map",
215
+ model=resolve_model_name(model_name),
216
+ prompt_family="map",
217
+ prompt_variant=variant,
218
+ system_prompt=system_prompt,
219
+ user_prompt=user_prompt,
220
+ tool_name="create_file_observations",
221
+ tool_description="Extract observations from a batch of file changes",
222
+ schema=_batch_observation_schema(),
223
+ progress_label=progress_label,
224
+ cacheable=True,
225
+ ),
226
+ )
227
+ output = response.output if hasattr(response, "output") else response
228
+ text_content = getattr(response, "text_content", None)
229
+ stop_reason = getattr(response, "stop_reason", None)
230
+ return _map_batch_response_to_observations(files, output, text_content, stop_reason)
231
+
232
+
233
+ def _map_batch_response_to_observations(
234
+ files: Sequence[FileDiff], output: Any, text_content: str | None, stop_reason: str | None
235
+ ) -> list[FileObservation]:
236
+ entries = _observation_entries(output)
237
+ if not entries and text_content and text_content.strip():
238
+ return [_fallback_file_observation(file) for file in files]
239
+ used = [False] * len(entries)
240
+ observations: list[FileObservation] = []
241
+ stopped_at_max_tokens = stop_reason == "max_tokens"
242
+ for file in files:
243
+ entry_idx = _find_observation_entry(file.filename, entries, used, files)
244
+ if entry_idx is None:
245
+ observations.append(_fallback_file_observation(file))
246
+ continue
247
+ used[entry_idx] = True
248
+ entry = entries[entry_idx]
249
+ raw_observations = _parse_observations(entry.get("observations", []))
250
+ if not raw_observations and stopped_at_max_tokens:
251
+ raw_observations = [_fallback_observation_text(file.filename)]
252
+ observations.append(FileObservation(file.filename, tuple(raw_observations), file.additions, file.deletions))
253
+ return observations
254
+
255
+
256
+ def _observation_entries(output: Any) -> list[dict[str, Any]]:
257
+ if isinstance(output, Mapping):
258
+ raw = output.get("files", [])
259
+ elif isinstance(output, list):
260
+ raw = output
261
+ else:
262
+ raw = []
263
+ return [dict(item) for item in raw if isinstance(item, Mapping)]
264
+
265
+
266
+ def _find_observation_entry(
267
+ filename: str, entries: Sequence[Mapping[str, Any]], used: Sequence[bool], batch_files: Sequence[FileDiff]
268
+ ) -> int | None:
269
+ basename = _path_basename(filename)
270
+ basename_unique = sum(1 for file in batch_files if _path_basename(file.filename) == basename) == 1
271
+ matchers = (
272
+ lambda entry: str(entry.get("path", "")) == filename,
273
+ lambda entry: basename_unique and _path_basename(str(entry.get("path", ""))) == basename,
274
+ lambda entry: _path_suffix_matches(str(entry.get("path", "")), filename),
275
+ )
276
+ for matcher in matchers:
277
+ for idx, entry in enumerate(entries):
278
+ if not used[idx] and matcher(entry):
279
+ return idx
280
+ return None
281
+
282
+
283
+ def _parse_observations(value: Any) -> list[str]:
284
+ if isinstance(value, str):
285
+ stripped = value.strip()
286
+ if stripped.startswith("["):
287
+ try:
288
+ decoded = json.loads(stripped)
289
+ if isinstance(decoded, list):
290
+ return [str(item).strip() for item in decoded if str(item).strip()]
291
+ except json.JSONDecodeError:
292
+ pass
293
+ return [line.lstrip("-*• ").strip() for line in stripped.splitlines() if line.lstrip("-*• ").strip()]
294
+ if isinstance(value, Iterable):
295
+ return [str(item).strip() for item in value if str(item).strip()]
296
+ return []
297
+
298
+
299
+ def _build_file_batches_for_indices(
300
+ files: Sequence[FileDiff], indices: Iterable[int], counter: Any, budget: int
301
+ ) -> list[list[int]]:
302
+ token_budget = max(1, int(budget))
303
+ batches: list[list[int]] = []
304
+ current: list[int] = []
305
+ current_tokens = 0
306
+ for idx in indices:
307
+ file_tokens = files[idx].token_estimate(counter)
308
+ if file_tokens > token_budget:
309
+ if current:
310
+ batches.append(current)
311
+ current = []
312
+ current_tokens = 0
313
+ batches.append([idx])
314
+ continue
315
+ if current and current_tokens + file_tokens > token_budget:
316
+ batches.append(current)
317
+ current = []
318
+ current_tokens = 0
319
+ current.append(idx)
320
+ current_tokens += file_tokens
321
+ if current:
322
+ batches.append(current)
323
+ return batches
324
+
325
+
326
+ def _included_files(files: Sequence[FileDiff], config: Any) -> list[FileDiff]:
327
+ excluded = tuple(str(item) for item in getattr(config, "excluded_files", ()))
328
+ return [file for file in files if not any(file.filename.endswith(pattern) for pattern in excluded)]
329
+
330
+
331
+ def _render_file_diff_for_batch(file: FileDiff, counter: Any) -> str:
332
+ if file.token_estimate(counter) <= MAX_FILE_TOKENS:
333
+ return _reconstruct_single_file_diff(file)
334
+ clone = FileDiff(file.filename, file.header, file.content, file.additions, file.deletions, file.is_binary)
335
+ clone.truncate(MAX_FILE_TOKENS * 4)
336
+ return reconstruct_diff([clone])
337
+
338
+
339
+ def _reconstruct_single_file_diff(file: FileDiff) -> str:
340
+ return f"{file.header}\n{file.content}" if file.content else file.header
341
+
342
+
343
+ def _fallback_file_observation(file: FileDiff) -> FileObservation:
344
+ return FileObservation(file.filename, (_fallback_observation_text(file.filename),), file.additions, file.deletions)
345
+
346
+
347
+ def _fallback_observation_text(filename: str) -> str:
348
+ return f"Updated {_path_basename(filename)}."
349
+
350
+
351
+ def _fallback_reduce_analysis(
352
+ observations: Sequence[FileObservation], config: Any, stat: str = ""
353
+ ) -> ConventionalAnalysis:
354
+ details = [obs for item in observations for obs in item.observations if obs]
355
+ summary = fallback_summary(stat=stat, details=details, limit=int(getattr(config, "summary_hard_limit", 128)))
356
+ return ConventionalAnalysis(
357
+ commit_type="chore",
358
+ summary=summary,
359
+ details=tuple(AnalysisDetail.simple(_ensure_sentence(detail)) for detail in details[:6]),
360
+ issue_refs=(),
361
+ )
362
+
363
+
364
+ def _ensure_sentence(text: str) -> str:
365
+ stripped = text.strip()
366
+ return stripped if not stripped or stripped.endswith((".", "!", "?")) else f"{stripped}."
367
+
368
+
369
+ def _batch_observation_schema() -> dict[str, Any]:
370
+ return strict_json_schema(
371
+ {
372
+ "files": {
373
+ "type": "array",
374
+ "description": "Per-file observations for every file in the map batch.",
375
+ "items": {
376
+ "type": "object",
377
+ "properties": {
378
+ "path": {"type": "string", "description": "Exact input file path."},
379
+ "observations": {"type": "array", "items": {"type": "string"}},
380
+ },
381
+ "required": ["path", "observations"],
382
+ "additionalProperties": False,
383
+ },
384
+ }
385
+ },
386
+ ["files"],
387
+ )
388
+
389
+
390
+ def _render_map_prompt(variant: str, files: Sequence[Mapping[str, str]], context_header: str) -> tuple[str, str]:
391
+ try:
392
+ from .templates import render_map_prompt
393
+
394
+ parts = render_map_prompt(variant, files, context_header)
395
+ return parts.system, parts.user
396
+ except Exception:
397
+ return render_prompt("map", variant, {"files": files, "context_header": context_header})
398
+
399
+
400
+ def _render_reduce_prompt(
401
+ variant: str, observations: str, stat: str, scope_candidates: str, types_description: str
402
+ ) -> tuple[str, str]:
403
+ try:
404
+ from .templates import render_reduce_prompt
405
+
406
+ parts = render_reduce_prompt(variant, observations, stat, scope_candidates, types_description)
407
+ return parts.system, parts.user
408
+ except Exception:
409
+ return render_prompt(
410
+ "reduce",
411
+ variant,
412
+ {
413
+ "observations": observations,
414
+ "stat": stat,
415
+ "scope_candidates": scope_candidates,
416
+ "types_description": types_description,
417
+ },
418
+ )
419
+
420
+
421
+ def _observation_to_mapping(item: FileObservation) -> dict[str, Any]:
422
+ return {
423
+ "file": item.file,
424
+ "observations": list(item.observations),
425
+ "additions": item.additions,
426
+ "deletions": item.deletions,
427
+ }
428
+
429
+
430
+ def _path_basename(path: str) -> str:
431
+ return Path(path).name or path
432
+
433
+
434
+ def _path_suffix_matches(left: str, right: str) -> bool:
435
+ return _path_has_suffix(left, right) or _path_has_suffix(right, left)
436
+
437
+
438
+ def _path_has_suffix(path: str, suffix: str) -> bool:
439
+ return path == suffix or path.endswith(f"/{suffix}") or path.endswith(f"\\{suffix}")
440
+
441
+
442
+ class _ContextHeaders:
443
+ def __init__(self, files: Sequence[FileDiff]) -> None:
444
+ self.large_commit_header = f"(Large commit with {len(files)} total files)" if len(files) > 100 else None
445
+ self.files = (
446
+ [
447
+ (
448
+ _file.filename,
449
+ _file.additions + _file.deletions,
450
+ _infer_file_description(_file.filename, _file.content),
451
+ )
452
+ for _file in files
453
+ ]
454
+ if self.large_commit_header is None
455
+ else []
456
+ )
457
+
458
+ def header_for_files(self, current_files: Sequence[str]) -> str:
459
+ if self.large_commit_header:
460
+ return self.large_commit_header
461
+ current = set(current_files)
462
+ others = [item for item in self.files if item[0] not in current]
463
+ if not others:
464
+ return ""
465
+ shown = sorted(others, key=lambda item: item[1], reverse=True)[:MAX_CONTEXT_FILES]
466
+ lines = ["OTHER FILES IN THIS CHANGE:", *(f"- {path} ({size} lines): {desc}" for path, size, desc in shown)]
467
+ if len(shown) < len(others):
468
+ lines.append(f"... and {len(others) - len(shown)} more files")
469
+ return "\n".join(lines)
470
+
471
+
472
+ def _infer_file_description(filename: str, content: str) -> str:
473
+ lower = filename.lower()
474
+ suffix = Path(filename).suffix.lower()
475
+ if "test" in lower:
476
+ return "test file"
477
+ if "prompt" in lower or "system" in lower:
478
+ return "prompt template"
479
+ if suffix == ".md":
480
+ return "documentation"
481
+ if "config" in lower or suffix in {".toml", ".yaml", ".yml"}:
482
+ return "configuration"
483
+ if "error" in lower:
484
+ return "error definitions"
485
+ if "type" in lower:
486
+ return "type definitions"
487
+ if lower.endswith(("mod.rs", "lib.rs")):
488
+ return "module exports"
489
+ if lower.endswith(("main.rs", "main.go", "main.py")):
490
+ return "entry point"
491
+ if "class " in content or "def " in content or "fn " in content:
492
+ return "implementation"
493
+ if "struct " in content or "enum " in content:
494
+ return "type definitions"
495
+ if "async " in content or "await" in content:
496
+ return "async code"
497
+ return "source code"
498
+
499
+
500
+ __all__ = [
501
+ "FileObservation",
502
+ "build_file_batches",
503
+ "build_llm_file_batches",
504
+ "observe_diff_files",
505
+ "reduce_phase",
506
+ "run_map_reduce",
507
+ "should_use_map_reduce",
508
+ ]