llm-commit-helper 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ """llm-commit-helper: LLM-friendly replacement for git diff --staged."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ # Local Variables:
6
+ # eval: (blacken-mode)
7
+ # End:
@@ -0,0 +1,10 @@
1
+ """Entry point: python -m llm_commit_helper"""
2
+
3
+ from llm_commit_helper.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
7
+
8
+ # Local Variables:
9
+ # eval: (blacken-mode)
10
+ # End:
@@ -0,0 +1,214 @@
1
+ """CLI entry point: argparse + main pipeline orchestration."""
2
+
3
+ import argparse
4
+ import sys
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ from llm_commit_helper.config import load_config, Config
9
+ from llm_commit_helper.git_staged import (
10
+ get_staged_files,
11
+ classify_file,
12
+ load_file_contents,
13
+ FileKind,
14
+ FileStatus,
15
+ )
16
+ from llm_commit_helper.submodule import get_submodule_log, format_submodule_section
17
+ from llm_commit_helper.formatters import format_diff
18
+ from llm_commit_helper.output import OutputBuilder, format_file_header
19
+ from llm_commit_helper.utils import find_git_root
20
+
21
+
22
+ def _parse_args(argv: Optional[list[str]] = None) -> argparse.Namespace:
23
+ parser = argparse.ArgumentParser(
24
+ prog="llm-commit-helper",
25
+ description="LLM-friendly replacement for git diff --staged",
26
+ )
27
+ parser.add_argument(
28
+ "--config",
29
+ metavar="PATH",
30
+ help="Path to config.jsonc (overrides hierarchy search)",
31
+ )
32
+ parser.add_argument(
33
+ "--max-total-size",
34
+ metavar="SIZE",
35
+ help="Override max_total_size from config (e.g. 500, 20KB)",
36
+ )
37
+ parser.add_argument(
38
+ "--verbose",
39
+ "-v",
40
+ action="store_true",
41
+ help="Print extra diagnostic info to stderr",
42
+ )
43
+ return parser.parse_args(argv)
44
+
45
+
46
+ def _make_summary_header(
47
+ files: list,
48
+ config: Config,
49
+ config_source: Optional[Path],
50
+ ) -> str:
51
+ counts: dict[str, int] = {
52
+ "modified": 0,
53
+ "added": 0,
54
+ "deleted": 0,
55
+ "excluded": 0,
56
+ "too_large": 0,
57
+ "submodule": 0,
58
+ "binary": 0,
59
+ }
60
+ for f in files:
61
+ kind_name = f.kind.value if f.kind else "modified"
62
+ if kind_name in counts:
63
+ counts[kind_name] += 1
64
+
65
+ total = len(files)
66
+ parts = []
67
+ for k, v in counts.items():
68
+ if v > 0:
69
+ parts.append(f"{v} {k}")
70
+
71
+ summary_line = f"Files: {total} total ({', '.join(parts)})"
72
+ cfg_line = f"Config: {config_source}" if config_source else "Config: defaults"
73
+
74
+ return f"=== Staged Changes Summary ===\n{summary_line}\n{cfg_line}\n\n"
75
+
76
+
77
+ def _process_file(f, config: Config, git_root: Path, verbose: bool) -> tuple[str, str]:
78
+ """Process one staged file and return (header, body) strings."""
79
+ kind = f.kind
80
+
81
+ if kind == FileKind.EXCLUDED:
82
+ header = format_file_header(f.path, "excluded")
83
+ body = "[changed - excluded by rule]\n\n"
84
+ return header, body
85
+
86
+ if kind == FileKind.TOO_LARGE:
87
+ header = format_file_header(f.path, "too_large")
88
+ body = "[changed - file too large]\n\n"
89
+ return header, body
90
+
91
+ if kind == FileKind.BINARY:
92
+ header = format_file_header(f.path, "binary")
93
+ body = "[binary file changed]\n\n"
94
+ return header, body
95
+
96
+ if kind == FileKind.ADDED:
97
+ header = format_file_header(f.path, "added")
98
+ body = "[new file - contents not shown]\n\n"
99
+ return header, body
100
+
101
+ if kind == FileKind.DELETED:
102
+ header = format_file_header(f.path, "deleted")
103
+ body = "[file deleted]\n\n"
104
+ return header, body
105
+
106
+ if kind == FileKind.MODIFIED:
107
+ f = load_file_contents(f, git_root)
108
+
109
+ if f.old_content is None and f.new_content is None:
110
+ header = format_file_header(f.path, "modified")
111
+ body = "[could not retrieve file content]\n\n"
112
+ return header, body
113
+
114
+ diff_text, is_fmt_only = format_diff(f.path, f.old_content, f.new_content)
115
+
116
+ if is_fmt_only and not diff_text:
117
+ header = format_file_header(f.path, "modified", "formatting-only")
118
+ body = "[no logic changes - formatting only]\n\n"
119
+ return header, body
120
+
121
+ if is_fmt_only:
122
+ header = format_file_header(f.path, "modified", "formatting-only")
123
+ else:
124
+ header = format_file_header(f.path, "modified")
125
+
126
+ body = (diff_text or "[empty diff]") + "\n\n"
127
+ return header, body
128
+
129
+ # Submodule handled separately
130
+ return "", ""
131
+
132
+
133
+ def main(argv: Optional[list[str]] = None) -> int:
134
+ args = _parse_args(argv)
135
+
136
+ git_root = find_git_root()
137
+ if git_root is None:
138
+ print("[llm-commit-helper] Error: not inside a git repository", file=sys.stderr)
139
+ return 1
140
+
141
+ config_path = Path(args.config) if args.config else None
142
+ config = load_config(config_path, start=git_root)
143
+
144
+ # Apply CLI override for max_total_size
145
+ if args.max_total_size:
146
+ from llm_commit_helper.utils import parse_size
147
+
148
+ config = Config(
149
+ exclude_patterns=config.exclude_patterns,
150
+ max_file_size=config.max_file_size,
151
+ max_total_size=parse_size(args.max_total_size),
152
+ source=config.source,
153
+ )
154
+
155
+ if args.verbose:
156
+ print(
157
+ f"[llm-commit-helper] git root: {git_root}",
158
+ file=sys.stderr,
159
+ )
160
+ print(
161
+ f"[llm-commit-helper] config: {config.source or 'defaults'}",
162
+ file=sys.stderr,
163
+ )
164
+ print(
165
+ f"[llm-commit-helper] max_total_size: {config.max_total_size}",
166
+ file=sys.stderr,
167
+ )
168
+
169
+ # Get and classify staged files
170
+ staged = get_staged_files(git_root)
171
+ if not staged:
172
+ print("No staged changes.", file=sys.stderr)
173
+ return 0
174
+
175
+ for f in staged:
176
+ f.kind = classify_file(f, config, git_root)
177
+
178
+ header = _make_summary_header(staged, config, config.source)
179
+ builder = OutputBuilder(config.max_total_size)
180
+
181
+ # Process regular files
182
+ for f in staged:
183
+ if f.kind == FileKind.SUBMODULE:
184
+ continue # handle submodules after
185
+
186
+ file_header, file_body = _process_file(f, config, git_root, args.verbose)
187
+ section = file_header + file_body
188
+ if not builder.add_section(section, file_path=f.path):
189
+ if args.verbose:
190
+ print(
191
+ f"[llm-commit-helper] Budget exceeded, truncating at {f.path}",
192
+ file=sys.stderr,
193
+ )
194
+
195
+ # Process submodules
196
+ for f in staged:
197
+ if f.kind != FileKind.SUBMODULE:
198
+ continue
199
+
200
+ log_lines = get_submodule_log(f.path, f.old_hash, f.new_hash, git_root)
201
+ section = format_submodule_section(f.path, f.old_hash, f.new_hash, log_lines) + "\n\n"
202
+ builder.add_section(section, file_path=f.path)
203
+
204
+ output = builder.build(
205
+ header=header,
206
+ footer_template="\n=== End of Staged Changes ({total_chars} chars) ===\n",
207
+ )
208
+ print(output)
209
+ return 0
210
+
211
+
212
+ # Local Variables:
213
+ # eval: (blacken-mode)
214
+ # End:
@@ -0,0 +1,132 @@
1
+ """Configuration loading: JSONC parsing, hierarchical search, Config dataclass."""
2
+
3
+ import json
4
+ import re
5
+ import sys
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+ from llm_commit_helper.utils import find_git_root, parse_size
11
+
12
+
13
+ DEFAULT_MAX_FILE_SIZE = parse_size("200MB")
14
+ DEFAULT_MAX_TOTAL_SIZE = 20000
15
+
16
+ CONFIG_FILENAME = "config.jsonc"
17
+ CONFIG_DIR_NAME = ".llm-commit-helper"
18
+ GLOBAL_CONFIG_DIR = Path.home() / ".config" / "llm-commit-helper"
19
+
20
+
21
+ @dataclass
22
+ class Config:
23
+ exclude_patterns: list[str] = field(default_factory=list)
24
+ max_file_size: int = DEFAULT_MAX_FILE_SIZE
25
+ max_total_size: int = DEFAULT_MAX_TOTAL_SIZE
26
+ source: Optional[Path] = None # which file was loaded, None = defaults
27
+
28
+
29
+ def _strip_jsonc_comments(text: str) -> str:
30
+ """Remove // line comments and trailing commas from JSONC text."""
31
+ # Remove // comments (not inside strings)
32
+ result = []
33
+ in_string = False
34
+ i = 0
35
+ while i < len(text):
36
+ ch = text[i]
37
+ if ch == '"' and (i == 0 or text[i - 1] != "\\"):
38
+ in_string = not in_string
39
+ result.append(ch)
40
+ elif ch == "/" and not in_string and i + 1 < len(text) and text[i + 1] == "/":
41
+ # Skip to end of line
42
+ while i < len(text) and text[i] != "\n":
43
+ i += 1
44
+ continue
45
+ else:
46
+ result.append(ch)
47
+ i += 1
48
+
49
+ stripped = "".join(result)
50
+
51
+ # Remove trailing commas before } or ]
52
+ stripped = re.sub(r",\s*([}\]])", r"\1", stripped)
53
+ return stripped
54
+
55
+
56
+ def _parse_jsonc(text: str) -> dict:
57
+ """Parse JSONC text into a Python dict."""
58
+ clean = _strip_jsonc_comments(text)
59
+ return json.loads(clean)
60
+
61
+
62
+ def _load_config_file(path: Path) -> Optional[Config]:
63
+ """Load and validate a config file. Returns None if file doesn't exist."""
64
+ if not path.exists():
65
+ return None
66
+ try:
67
+ raw = path.read_text(encoding="utf-8")
68
+ data = _parse_jsonc(raw)
69
+ except (json.JSONDecodeError, OSError) as e:
70
+ print(
71
+ f"[llm-commit-helper] Warning: failed to parse {path}: {e}",
72
+ file=sys.stderr,
73
+ )
74
+ return None
75
+
76
+ rules = data.get("rules", {})
77
+ exclude = rules.get("exclude", [])
78
+ max_file_size_raw = rules.get("max_file_size", DEFAULT_MAX_FILE_SIZE)
79
+ max_total_size_raw = rules.get("max_total_size", DEFAULT_MAX_TOTAL_SIZE)
80
+
81
+ return Config(
82
+ exclude_patterns=list(exclude),
83
+ max_file_size=parse_size(max_file_size_raw),
84
+ max_total_size=parse_size(max_total_size_raw),
85
+ source=path,
86
+ )
87
+
88
+
89
+ def _candidate_paths(start: Optional[Path] = None) -> list[Path]:
90
+ """Return config file candidates from cwd up to git root, then global."""
91
+ current = (start or Path.cwd()).resolve()
92
+ git_root = find_git_root(current)
93
+ candidates: list[Path] = []
94
+
95
+ # Walk from cwd up to git root (or filesystem root)
96
+ for parent in [current, *current.parents]:
97
+ candidates.append(parent / CONFIG_FILENAME)
98
+ candidates.append(parent / CONFIG_DIR_NAME / CONFIG_FILENAME)
99
+ if git_root and parent == git_root:
100
+ break
101
+
102
+ # Global config
103
+ candidates.append(GLOBAL_CONFIG_DIR / CONFIG_FILENAME)
104
+ return candidates
105
+
106
+
107
+ def load_config(
108
+ config_path: Optional[Path] = None,
109
+ start: Optional[Path] = None,
110
+ ) -> Config:
111
+ """Load configuration. If config_path given, use it. Otherwise search hierarchy."""
112
+ if config_path is not None:
113
+ cfg = _load_config_file(config_path)
114
+ if cfg is None:
115
+ print(
116
+ f"[llm-commit-helper] Warning: config file not found: {config_path}",
117
+ file=sys.stderr,
118
+ )
119
+ return Config()
120
+ return cfg
121
+
122
+ for candidate in _candidate_paths(start):
123
+ cfg = _load_config_file(candidate)
124
+ if cfg is not None:
125
+ return cfg
126
+
127
+ return Config() # defaults
128
+
129
+
130
+ # Local Variables:
131
+ # eval: (blacken-mode)
132
+ # End:
@@ -0,0 +1,91 @@
1
+ """Diff engine: difflib wrapper with per-hunk formatting annotation."""
2
+
3
+ import difflib
4
+ from typing import Callable
5
+
6
+
7
+ def make_unified_diff(
8
+ old_lines: list[str],
9
+ new_lines: list[str],
10
+ fromfile: str = "old",
11
+ tofile: str = "new",
12
+ n: int = 3,
13
+ ) -> list[str]:
14
+ """Produce unified diff lines (with headers) using difflib."""
15
+ return list(
16
+ difflib.unified_diff(
17
+ old_lines,
18
+ new_lines,
19
+ fromfile=fromfile,
20
+ tofile=tofile,
21
+ n=n,
22
+ )
23
+ )
24
+
25
+
26
+ def _split_hunks(
27
+ diff_lines: list[str],
28
+ ) -> tuple[list[str], list[tuple[int, int]]]:
29
+ """Split unified diff into header lines and list of (start, end) hunk spans."""
30
+ hunk_spans: list[tuple[int, int]] = []
31
+ hunk_start = None
32
+
33
+ for i, line in enumerate(diff_lines):
34
+ if line.startswith("@@"):
35
+ if hunk_start is not None:
36
+ hunk_spans.append((hunk_start, i))
37
+ hunk_start = i
38
+
39
+ if hunk_start is not None:
40
+ hunk_spans.append((hunk_start, len(diff_lines)))
41
+
42
+ return hunk_spans
43
+
44
+
45
+ def annotate_formatting_hunks(
46
+ old_lines: list[str],
47
+ new_lines: list[str],
48
+ is_formatting_only: Callable[[list[str], list[str]], bool],
49
+ n: int = 3,
50
+ ) -> tuple[list[str], bool]:
51
+ """Produce annotated unified diff lines.
52
+
53
+ For each hunk that is formatting-only, inserts a [formatting-only] marker
54
+ after the @@ line.
55
+
56
+ Returns (diff_lines, all_hunks_are_formatting_only).
57
+ """
58
+ raw = make_unified_diff(old_lines, new_lines, n=n)
59
+ if not raw:
60
+ return [], True
61
+
62
+ hunk_spans = _split_hunks(raw)
63
+ if not hunk_spans:
64
+ return raw, False
65
+
66
+ result: list[str] = []
67
+ all_formatting = True
68
+ # Copy header lines (before first hunk)
69
+ first_hunk_start = hunk_spans[0][0]
70
+ result.extend(raw[:first_hunk_start])
71
+
72
+ for start, end in hunk_spans:
73
+ hunk_body = raw[start:end]
74
+ removed = [l[1:] for l in hunk_body if l.startswith("-") and not l.startswith("---")]
75
+ added = [l[1:] for l in hunk_body if l.startswith("+") and not l.startswith("+++")]
76
+
77
+ fmt_only = is_formatting_only(removed, added)
78
+ if not fmt_only:
79
+ all_formatting = False
80
+
81
+ result.append(hunk_body[0]) # the @@ line
82
+ if fmt_only:
83
+ result.append("[formatting-only]\n")
84
+ result.extend(hunk_body[1:])
85
+
86
+ return result, all_formatting
87
+
88
+
89
+ # Local Variables:
90
+ # eval: (blacken-mode)
91
+ # End:
@@ -0,0 +1,35 @@
1
+ """Formatter dispatcher: selects the right formatter by file extension."""
2
+
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+
7
+ def format_diff(
8
+ path: str,
9
+ old_content: Optional[str],
10
+ new_content: Optional[str],
11
+ ) -> tuple[str, bool]:
12
+ """Format a diff for the given file path.
13
+
14
+ Returns (diff_text, is_formatting_only).
15
+ is_formatting_only=True means no logic changes were found.
16
+ """
17
+ ext = Path(path).suffix.lower()
18
+
19
+ if ext == ".py":
20
+ from llm_commit_helper.formatters.python_fmt import format_python_diff
21
+
22
+ return format_python_diff(path, old_content, new_content)
23
+ elif ext in (".v", ".sv"):
24
+ from llm_commit_helper.formatters.verilog_fmt import format_verilog_diff
25
+
26
+ return format_verilog_diff(path, old_content, new_content)
27
+ else:
28
+ from llm_commit_helper.formatters.generic_fmt import format_generic_diff
29
+
30
+ return format_generic_diff(path, old_content, new_content)
31
+
32
+
33
+ # Local Variables:
34
+ # eval: (blacken-mode)
35
+ # End:
@@ -0,0 +1,47 @@
1
+ """Generic formatter: per-hunk whitespace normalization."""
2
+
3
+ import difflib
4
+ import re
5
+ from typing import Optional
6
+
7
+ from llm_commit_helper.diff_engine import make_unified_diff, annotate_formatting_hunks
8
+
9
+
10
+ def _normalize_line(line: str) -> str:
11
+ """Strip whitespace and normalize runs of spaces/tabs."""
12
+ return re.sub(r"\s+", " ", line.strip())
13
+
14
+
15
+ def _hunk_is_formatting_only(removed: list[str], added: list[str]) -> bool:
16
+ """Return True if hunk differences are purely whitespace."""
17
+ norm_removed = [_normalize_line(l) for l in removed]
18
+ norm_added = [_normalize_line(l) for l in added]
19
+ return norm_removed == norm_added
20
+
21
+
22
+ def format_generic_diff(
23
+ path: str,
24
+ old_content: Optional[str],
25
+ new_content: Optional[str],
26
+ ) -> tuple[str, bool]:
27
+ """Produce a unified diff with formatting-only hunk annotations.
28
+
29
+ Returns (diff_text, is_formatting_only).
30
+ """
31
+ old_lines = (old_content or "").splitlines(keepends=True)
32
+ new_lines = (new_content or "").splitlines(keepends=True)
33
+
34
+ diff_lines, all_formatting = annotate_formatting_hunks(
35
+ old_lines, new_lines, _hunk_is_formatting_only
36
+ )
37
+
38
+ if not diff_lines:
39
+ return "", True # no diff at all
40
+
41
+ diff_text = "".join(diff_lines)
42
+ return diff_text, all_formatting
43
+
44
+
45
+ # Local Variables:
46
+ # eval: (blacken-mode)
47
+ # End:
@@ -0,0 +1,81 @@
1
+ """Python formatter: use black to separate logic from formatting changes."""
2
+
3
+ import sys
4
+ from typing import Optional
5
+
6
+ from llm_commit_helper.utils import make_temp_file, run_command
7
+ from llm_commit_helper.diff_engine import annotate_formatting_hunks
8
+ from llm_commit_helper.formatters.generic_fmt import _hunk_is_formatting_only
9
+
10
+
11
+ def _run_black(path_str: str) -> bool:
12
+ """Run black --quiet on the given file path. Return True on success."""
13
+ rc, _, err = run_command(["black", "--quiet", path_str])
14
+ if rc != 0:
15
+ return False
16
+ return True
17
+
18
+
19
+ def format_python_diff(
20
+ path: str,
21
+ old_content: Optional[str],
22
+ new_content: Optional[str],
23
+ ) -> tuple[str, bool]:
24
+ """Format Python diff using black to isolate logic changes.
25
+
26
+ Returns (diff_text, is_formatting_only).
27
+ Falls back to generic diff if black is not available.
28
+ """
29
+ old_tmp = None
30
+ new_tmp = None
31
+ try:
32
+ old_tmp = make_temp_file(suffix=".py", content=old_content or "")
33
+ new_tmp = make_temp_file(suffix=".py", content=new_content or "")
34
+
35
+ old_ok = _run_black(str(old_tmp))
36
+ new_ok = _run_black(str(new_tmp))
37
+
38
+ if not old_ok or not new_ok:
39
+ print(
40
+ f"[llm-commit-helper] black not available or failed for {path}, falling back to generic",
41
+ file=sys.stderr,
42
+ )
43
+ from llm_commit_helper.formatters.generic_fmt import format_generic_diff
44
+
45
+ return format_generic_diff(path, old_content, new_content)
46
+
47
+ old_formatted = old_tmp.read_text(encoding="utf-8")
48
+ new_formatted = new_tmp.read_text(encoding="utf-8")
49
+
50
+ old_lines = old_formatted.splitlines(keepends=True)
51
+ new_lines = new_formatted.splitlines(keepends=True)
52
+
53
+ diff_lines, all_formatting = annotate_formatting_hunks(
54
+ old_lines, new_lines, _hunk_is_formatting_only
55
+ )
56
+
57
+ if not diff_lines:
58
+ return "", True # identical after formatting
59
+
60
+ # Check if logic diff is empty (formatted versions match)
61
+ # If the only differences are in the raw diff (pre-formatting), it's formatting-only
62
+ raw_old = (old_content or "").splitlines(keepends=True)
63
+ raw_new = (new_content or "").splitlines(keepends=True)
64
+ raw_diff_lines, _ = annotate_formatting_hunks(raw_old, raw_new, _hunk_is_formatting_only)
65
+
66
+ if not diff_lines and raw_diff_lines:
67
+ return "[all changes are formatting-only (black normalization)]", True
68
+
69
+ diff_text = "".join(diff_lines)
70
+ return diff_text, all_formatting
71
+
72
+ finally:
73
+ if old_tmp and old_tmp.exists():
74
+ old_tmp.unlink()
75
+ if new_tmp and new_tmp.exists():
76
+ new_tmp.unlink()
77
+
78
+
79
+ # Local Variables:
80
+ # eval: (blacken-mode)
81
+ # End: