llm-commit-helper 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,116 @@
1
+ """Verilog formatter: delete AUTO-generated sections before diffing."""
2
+
3
+ import re
4
+ import sys
5
+ from typing import Optional
6
+
7
+ from llm_commit_helper.utils import make_temp_file, run_command
8
+ from llm_commit_helper.diff_engine import annotate_formatting_hunks
9
+ from llm_commit_helper.formatters.generic_fmt import _hunk_is_formatting_only
10
+
11
+
12
+ # Regex for AUTO* macro keywords
13
+ AUTO_PATTERN = re.compile(
14
+ r"\b(AUTOARG|AUTOINPUT|AUTOOUTPUT|AUTOINOUT|AUTOINSTPARAM|AUTOINST"
15
+ r"|AUTOWIRE|AUTOREG|AUTOREGINPUT|AUTOLOGIC|AUTOASCIIENUM|AUTOSENSE"
16
+ r"|AUTOUNUSED|AUTOTEMPLATE|AUTO_LISP)\b"
17
+ )
18
+
19
+
20
+ def _has_auto_macros(content: str) -> bool:
21
+ """Return True if content uses any AUTO* Verilog macros."""
22
+ return bool(AUTO_PATTERN.search(content))
23
+
24
+
25
+ def _run_emacs_delete_auto(file_path: str) -> bool:
26
+ """Delete all AUTO-generated sections from file using emacs verilog-batch-delete-auto.
27
+
28
+ This strips the expanded output (AUTOWIRE, AUTOINST, etc.) so that only
29
+ the hand-written source remains, making diffs independent of AUTO ordering.
30
+ Returns True on success.
31
+ """
32
+ rc, _, err = run_command(
33
+ [
34
+ "emacs",
35
+ "--batch",
36
+ file_path,
37
+ "-f",
38
+ "verilog-batch-delete-auto",
39
+ "-f",
40
+ "save-buffer",
41
+ ]
42
+ )
43
+ return rc == 0
44
+
45
+
46
+ def format_verilog_diff(
47
+ path: str,
48
+ old_content: Optional[str],
49
+ new_content: Optional[str],
50
+ ) -> tuple[str, bool]:
51
+ """Format Verilog diff by deleting AUTO-generated sections before comparing.
52
+
53
+ verilog-batch-delete-auto removes all AUTO-expanded blocks (AUTOWIRE,
54
+ AUTOINST, etc.), leaving only hand-written source. Diffing the stripped
55
+ versions avoids spurious ordering differences in generated code.
56
+
57
+ Returns (diff_text, is_formatting_only).
58
+ Falls back to generic if emacs is not available or no AUTO* macros found.
59
+ """
60
+ combined = (old_content or "") + (new_content or "")
61
+ has_auto = _has_auto_macros(combined)
62
+
63
+ old_tmp = None
64
+ new_tmp = None
65
+ try:
66
+ ext = ".sv" if path.endswith(".sv") else ".v"
67
+ old_tmp = make_temp_file(suffix=ext, content=old_content or "")
68
+ new_tmp = make_temp_file(suffix=ext, content=new_content or "")
69
+
70
+ if has_auto:
71
+ old_ok = _run_emacs_delete_auto(str(old_tmp))
72
+ new_ok = _run_emacs_delete_auto(str(new_tmp))
73
+
74
+ if not old_ok or not new_ok:
75
+ print(
76
+ f"[llm-commit-helper] emacs not available or failed for {path}, falling back to generic",
77
+ file=sys.stderr,
78
+ )
79
+ from llm_commit_helper.formatters.generic_fmt import format_generic_diff
80
+
81
+ return format_generic_diff(path, old_content, new_content)
82
+
83
+ old_processed = old_tmp.read_text(encoding="utf-8")
84
+ new_processed = new_tmp.read_text(encoding="utf-8")
85
+ else:
86
+ old_processed = old_content or ""
87
+ new_processed = new_content or ""
88
+
89
+ old_lines = old_processed.splitlines(keepends=True)
90
+ new_lines = new_processed.splitlines(keepends=True)
91
+
92
+ diff_lines, all_formatting = annotate_formatting_hunks(
93
+ old_lines, new_lines, _hunk_is_formatting_only
94
+ )
95
+
96
+ if not diff_lines:
97
+ if has_auto:
98
+ return "[all changes are AUTO-generated - no user code changes]", True
99
+ return "", True
100
+
101
+ diff_text = "".join(diff_lines)
102
+ if has_auto and all_formatting:
103
+ diff_text = f"[formatting-only after AUTO deletion]\n{diff_text}"
104
+
105
+ return diff_text, all_formatting
106
+
107
+ finally:
108
+ if old_tmp and old_tmp.exists():
109
+ old_tmp.unlink()
110
+ if new_tmp and new_tmp.exists():
111
+ new_tmp.unlink()
112
+
113
+
114
+ # Local Variables:
115
+ # eval: (blacken-mode)
116
+ # End:
@@ -0,0 +1,244 @@
1
+ """Git interaction: staged file listing, classification, content retrieval."""
2
+
3
+ import sys
4
+ from dataclasses import dataclass
5
+ from enum import Enum
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+ from llm_commit_helper.config import Config
10
+ from llm_commit_helper.utils import find_git_root, glob_match, run_command
11
+
12
+
13
+ class FileStatus(Enum):
14
+ ADDED = "added"
15
+ MODIFIED = "modified"
16
+ DELETED = "deleted"
17
+ RENAMED = "renamed"
18
+
19
+
20
+ class FileKind(Enum):
21
+ EXCLUDED = "excluded" # matched exclude rule or too large
22
+ TOO_LARGE = "too_large" # file size exceeds max_file_size
23
+ ADDED = "added" # new file - don't show content
24
+ DELETED = "deleted" # deleted file
25
+ SUBMODULE = "submodule" # gitlink / submodule
26
+ MODIFIED = "modified" # regular diff
27
+ BINARY = "binary" # binary file
28
+
29
+
30
+ @dataclass
31
+ class StagedFile:
32
+ path: str # relative to git root
33
+ status: FileStatus
34
+ is_submodule: bool = False
35
+ old_hash: Optional[str] = None # for submodules: previous commit
36
+ new_hash: Optional[str] = None # for submodules: new commit
37
+ kind: Optional[FileKind] = None # set by classify_file()
38
+ old_content: Optional[str] = None
39
+ new_content: Optional[str] = None
40
+
41
+
42
+ def _parse_name_status(output: str) -> list[tuple[str, str, Optional[str]]]:
43
+ """Parse git diff --name-status output into (status_char, path, old_path) tuples."""
44
+ results = []
45
+ for line in output.splitlines():
46
+ if not line.strip():
47
+ continue
48
+ parts = line.split("\t")
49
+ status_char = parts[0][0] # A, M, D, R, C, etc.
50
+ if status_char in ("R", "C") and len(parts) >= 3:
51
+ # Renamed: R100\told_path\tnew_path
52
+ results.append((status_char, parts[2], parts[1]))
53
+ elif len(parts) >= 2:
54
+ results.append((status_char, parts[1], None))
55
+ return results
56
+
57
+
58
+ def _get_submodule_hashes(git_root: Path) -> dict[str, tuple[str, str]]:
59
+ """Return {path: (old_hash, new_hash)} for staged submodule changes."""
60
+ rc, out, err = run_command(
61
+ ["git", "diff", "--staged", "--raw"],
62
+ cwd=git_root,
63
+ )
64
+ hashes: dict[str, tuple[str, str]] = {}
65
+ if rc != 0:
66
+ return hashes
67
+
68
+ for line in out.splitlines():
69
+ # Format: :old_mode new_mode old_hash new_hash status\tpath
70
+ if not line.startswith(":"):
71
+ continue
72
+ parts = line.split("\t")
73
+ if len(parts) < 2:
74
+ continue
75
+ meta = parts[0].split()
76
+ if len(meta) < 5:
77
+ continue
78
+ old_mode, new_mode, old_hash, new_hash, status = meta[0], meta[1], meta[2], meta[3], meta[4]
79
+ path = parts[1]
80
+ # Submodule mode is 160000
81
+ if old_mode == ":160000" or new_mode == "160000" or old_mode == "160000":
82
+ hashes[path] = (old_hash, new_hash)
83
+ # Also check via the leading : format
84
+ old_mode_clean = old_mode.lstrip(":")
85
+ if old_mode_clean == "160000" or new_mode == "160000":
86
+ hashes[path] = (old_hash, new_hash)
87
+
88
+ return hashes
89
+
90
+
91
+ def _get_submodule_set(git_root: Path) -> set[str]:
92
+ """Return the set of all submodule paths registered in .gitmodules."""
93
+ rc, out, _ = run_command(
94
+ ["git", "config", "--file", ".gitmodules", "--get-regexp", "path"],
95
+ cwd=git_root,
96
+ )
97
+ paths = set()
98
+ if rc != 0:
99
+ return paths
100
+ for line in out.splitlines():
101
+ parts = line.split()
102
+ if len(parts) >= 2:
103
+ paths.add(parts[1])
104
+ return paths
105
+
106
+
107
+ def get_staged_files(git_root: Optional[Path] = None) -> list[StagedFile]:
108
+ """Return StagedFile list for all currently staged changes."""
109
+ if git_root is None:
110
+ git_root = find_git_root()
111
+ if git_root is None:
112
+ print("[llm-commit-helper] Error: not inside a git repository", file=sys.stderr)
113
+ return []
114
+
115
+ rc, out, err = run_command(
116
+ ["git", "diff", "--staged", "--name-status"],
117
+ cwd=git_root,
118
+ )
119
+ if rc != 0:
120
+ print(f"[llm-commit-helper] git diff failed: {err}", file=sys.stderr)
121
+ return []
122
+
123
+ parsed = _parse_name_status(out)
124
+ submodule_paths = _get_submodule_set(git_root)
125
+ submodule_hashes = _get_submodule_hashes(git_root)
126
+
127
+ status_map = {
128
+ "A": FileStatus.ADDED,
129
+ "M": FileStatus.MODIFIED,
130
+ "D": FileStatus.DELETED,
131
+ "R": FileStatus.RENAMED,
132
+ "C": FileStatus.MODIFIED,
133
+ }
134
+
135
+ files = []
136
+ for status_char, path, old_path in parsed:
137
+ status = status_map.get(status_char, FileStatus.MODIFIED)
138
+ is_sub = path in submodule_paths
139
+ old_h, new_h = submodule_hashes.get(path, (None, None))
140
+ files.append(
141
+ StagedFile(
142
+ path=path,
143
+ status=status,
144
+ is_submodule=is_sub,
145
+ old_hash=old_h,
146
+ new_hash=new_h,
147
+ )
148
+ )
149
+
150
+ return files
151
+
152
+
153
+ def _file_size_in_index(path: str, git_root: Path) -> int:
154
+ """Return the size (bytes) of the staged version of a file, or 0 on error."""
155
+ rc, out, _ = run_command(
156
+ ["git", "cat-file", "-s", f":0:{path}"],
157
+ cwd=git_root,
158
+ )
159
+ if rc != 0:
160
+ return 0
161
+ try:
162
+ return int(out.strip())
163
+ except ValueError:
164
+ return 0
165
+
166
+
167
+ def _is_binary(path: str, git_root: Path) -> bool:
168
+ """Heuristic: check if the staged file is binary."""
169
+ rc, out, _ = run_command(
170
+ ["git", "diff", "--staged", "--numstat", "--", path],
171
+ cwd=git_root,
172
+ )
173
+ if rc == 0 and out.startswith("-\t-"):
174
+ return True
175
+ return False
176
+
177
+
178
+ def classify_file(f: StagedFile, config: Config, git_root: Path) -> FileKind:
179
+ """Assign a FileKind to a StagedFile based on config rules."""
180
+ # Submodule takes priority
181
+ if f.is_submodule:
182
+ return FileKind.SUBMODULE
183
+
184
+ # Exclude patterns
185
+ for pattern in config.exclude_patterns:
186
+ if glob_match(pattern, f.path):
187
+ return FileKind.EXCLUDED
188
+
189
+ # Added files — no content shown
190
+ if f.status == FileStatus.ADDED:
191
+ return FileKind.ADDED
192
+
193
+ # Deleted files
194
+ if f.status == FileStatus.DELETED:
195
+ return FileKind.DELETED
196
+
197
+ # Size check (staged version)
198
+ size = _file_size_in_index(f.path, git_root)
199
+ if size > config.max_file_size:
200
+ return FileKind.TOO_LARGE
201
+
202
+ # Binary check
203
+ if _is_binary(f.path, git_root):
204
+ return FileKind.BINARY
205
+
206
+ return FileKind.MODIFIED
207
+
208
+
209
+ def get_file_content(path: str, git_root: Path, staged: bool = True) -> Optional[str]:
210
+ """Retrieve file content from git: staged (index) or committed (HEAD)."""
211
+ if staged:
212
+ ref = f":0:{path}"
213
+ else:
214
+ ref = f"HEAD:{path}"
215
+
216
+ rc, out, err = run_command(["git", "show", ref], cwd=git_root)
217
+ if rc != 0:
218
+ return None
219
+ return out
220
+
221
+
222
+ def load_file_contents(f: StagedFile, git_root: Path) -> StagedFile:
223
+ """Load old and new content into StagedFile for MODIFIED/DELETED files."""
224
+ if f.kind not in (FileKind.MODIFIED, FileKind.DELETED):
225
+ return f
226
+
227
+ old = get_file_content(f.path, git_root, staged=False)
228
+ new = get_file_content(f.path, git_root, staged=True) if f.kind == FileKind.MODIFIED else None
229
+
230
+ return StagedFile(
231
+ path=f.path,
232
+ status=f.status,
233
+ is_submodule=f.is_submodule,
234
+ old_hash=f.old_hash,
235
+ new_hash=f.new_hash,
236
+ kind=f.kind,
237
+ old_content=old,
238
+ new_content=new,
239
+ )
240
+
241
+
242
+ # Local Variables:
243
+ # eval: (blacken-mode)
244
+ # End:
@@ -0,0 +1,83 @@
1
+ """Output assembly: size-budgeted output builder."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Optional
5
+
6
+
7
+ @dataclass
8
+ class FileSummary:
9
+ path: str
10
+ label: str # e.g. "modified", "added", "excluded", ...
11
+ content: str # the body text for this file section
12
+ truncated: bool = False
13
+
14
+
15
+ class OutputBuilder:
16
+ """Assembles output sections while respecting max_total_size budget."""
17
+
18
+ def __init__(self, max_total_size: int) -> None:
19
+ self._max = max_total_size
20
+ self._sections: list[str] = []
21
+ self._current_size: int = 0
22
+ self._truncated: bool = False
23
+ self._truncated_files: list[str] = []
24
+
25
+ def _remaining(self) -> int:
26
+ return self._max - self._current_size
27
+
28
+ def add_section(self, text: str, file_path: Optional[str] = None) -> bool:
29
+ """Add a text section. Returns True if added in full, False if budget exceeded."""
30
+ if self._truncated:
31
+ if file_path:
32
+ self._truncated_files.append(file_path)
33
+ return False
34
+
35
+ size = len(text)
36
+ if self._current_size + size > self._max:
37
+ self._truncated = True
38
+ if file_path:
39
+ self._truncated_files.append(file_path)
40
+ return False
41
+
42
+ self._sections.append(text)
43
+ self._current_size += size
44
+ return True
45
+
46
+ def build(self, header: str, footer_template: str) -> str:
47
+ """Build the final output string."""
48
+ parts = [header]
49
+ parts.extend(self._sections)
50
+
51
+ if self._truncated:
52
+ parts.append("\n[OUTPUT TRUNCATED - budget exceeded]\n")
53
+ if self._truncated_files:
54
+ parts.append("Remaining files (not shown):\n")
55
+ for p in self._truncated_files:
56
+ parts.append(f" {p}\n")
57
+
58
+ total_chars = sum(len(p) for p in parts)
59
+ footer = footer_template.format(total_chars=total_chars)
60
+ parts.append(footer)
61
+ return "".join(parts)
62
+
63
+ @property
64
+ def is_truncated(self) -> bool:
65
+ return self._truncated
66
+
67
+ @property
68
+ def current_size(self) -> int:
69
+ return self._current_size
70
+
71
+
72
+ def format_file_header(path: str, label: str, extra: str = "") -> str:
73
+ """Format a file section header line."""
74
+ parts = [f"--- File: {path} [{label}]"]
75
+ if extra:
76
+ parts.append(f" [{extra}]")
77
+ parts.append(" ---\n")
78
+ return "".join(parts)
79
+
80
+
81
+ # Local Variables:
82
+ # eval: (blacken-mode)
83
+ # End:
@@ -0,0 +1,96 @@
1
+ """Submodule log: fetch commit log between old and new hashes."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ from llm_commit_helper.utils import run_command
8
+
9
+
10
+ _NULL_HASH = set(["0" * 7, "0" * 8, "0" * 40])
11
+
12
+
13
+ def _is_null_hash(h: str) -> bool:
14
+ """Return True if hash is the all-zeros null hash (new submodule addition)."""
15
+ return not h.strip("0")
16
+
17
+
18
+ def get_submodule_log(
19
+ submodule_path: str,
20
+ old_hash: Optional[str],
21
+ new_hash: Optional[str],
22
+ git_root: Path,
23
+ ) -> list[str]:
24
+ """Return one-line log entries for a submodule update.
25
+
26
+ For a newly added submodule (old hash is null), returns the last few
27
+ commits of new_hash instead of an old..new range.
28
+ Returns an empty list if the submodule is uninitialized or hashes are missing.
29
+ """
30
+ if not old_hash or not new_hash:
31
+ return []
32
+
33
+ abs_path = git_root / submodule_path
34
+ if not abs_path.is_dir():
35
+ print(
36
+ f"[llm-commit-helper] Submodule not initialized: {submodule_path}",
37
+ file=sys.stderr,
38
+ )
39
+ return []
40
+
41
+ if _is_null_hash(old_hash):
42
+ # Newly added submodule — show the tip commit only
43
+ rc, out, err = run_command(
44
+ ["git", "log", "--oneline", "-5", new_hash],
45
+ cwd=abs_path,
46
+ )
47
+ else:
48
+ rc, out, err = run_command(
49
+ ["git", "log", "--oneline", f"{old_hash}..{new_hash}"],
50
+ cwd=abs_path,
51
+ )
52
+
53
+ if rc != 0:
54
+ print(
55
+ f"[llm-commit-helper] Could not get submodule log for {submodule_path}: {err.strip()}",
56
+ file=sys.stderr,
57
+ )
58
+ return []
59
+
60
+ lines = [line for line in out.splitlines() if line.strip()]
61
+ return lines
62
+
63
+
64
+ def format_submodule_section(
65
+ submodule_path: str,
66
+ old_hash: Optional[str],
67
+ new_hash: Optional[str],
68
+ log_lines: list[str],
69
+ ) -> str:
70
+ """Format the submodule section for output."""
71
+ new_short = (new_hash or "?")[:8]
72
+ is_new = not old_hash or _is_null_hash(old_hash)
73
+
74
+ if is_new:
75
+ lines = [
76
+ f"--- Submodule: {submodule_path} ---",
77
+ f"Added at: {new_short}",
78
+ ]
79
+ else:
80
+ old_short = (old_hash or "?")[:8]
81
+ lines = [
82
+ f"--- Submodule: {submodule_path} ---",
83
+ f"Updated: {old_short} -> {new_short}",
84
+ ]
85
+
86
+ if log_lines:
87
+ for entry in log_lines:
88
+ lines.append(f" {entry}")
89
+ else:
90
+ lines.append(" [no log available]")
91
+ return "\n".join(lines)
92
+
93
+
94
+ # Local Variables:
95
+ # eval: (blacken-mode)
96
+ # End:
@@ -0,0 +1,124 @@
1
+ """Utility functions: subprocess, size parsing, glob matching, git root."""
2
+
3
+ import fnmatch
4
+ import subprocess
5
+ import sys
6
+ import tempfile
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+
11
+ COMMAND_TIMEOUT = 30 # seconds
12
+
13
+
14
+ def run_command(
15
+ args: list[str],
16
+ cwd: Optional[Path] = None,
17
+ check: bool = False,
18
+ ) -> tuple[int, str, str]:
19
+ """Run a subprocess command and return (returncode, stdout, stderr).
20
+
21
+ Never raises on non-zero exit unless check=True.
22
+ Always enforces a 30-second timeout.
23
+ """
24
+ try:
25
+ result = subprocess.run(
26
+ args,
27
+ cwd=cwd,
28
+ capture_output=True,
29
+ text=True,
30
+ timeout=COMMAND_TIMEOUT,
31
+ check=check,
32
+ )
33
+ return result.returncode, result.stdout, result.stderr
34
+ except subprocess.TimeoutExpired:
35
+ print(f"[llm-commit-helper] Timeout running: {' '.join(args)}", file=sys.stderr)
36
+ return 1, "", "timeout"
37
+ except FileNotFoundError:
38
+ return 1, "", f"command not found: {args[0]}"
39
+
40
+
41
+ def parse_size(value: str | int | float) -> int:
42
+ """Parse a human-readable size string into bytes.
43
+
44
+ Accepts: 200MB, 20KB, 4096, 1.5GB, etc.
45
+ Returns the size in bytes as an integer.
46
+ """
47
+ if isinstance(value, (int, float)):
48
+ return int(value)
49
+
50
+ s = str(value).strip().upper()
51
+ suffixes = {
52
+ "GB": 1024**3,
53
+ "MB": 1024**2,
54
+ "KB": 1024,
55
+ "B": 1,
56
+ }
57
+ for suffix, multiplier in suffixes.items():
58
+ if s.endswith(suffix):
59
+ number = s[: -len(suffix)].strip()
60
+ return int(float(number) * multiplier)
61
+ return int(float(s))
62
+
63
+
64
+ def glob_match(pattern: str, path: str) -> bool:
65
+ """Return True if path matches a glob pattern.
66
+
67
+ Supports ** for directory wildcards via fnmatch with path normalization.
68
+ """
69
+ # Normalize separators
70
+ path = path.replace("\\", "/")
71
+ pattern = pattern.replace("\\", "/")
72
+
73
+ # Try direct match
74
+ if fnmatch.fnmatch(path, pattern):
75
+ return True
76
+
77
+ # Try matching just the filename against the pattern
78
+ filename = Path(path).name
79
+ if fnmatch.fnmatch(filename, pattern):
80
+ return True
81
+
82
+ # For ** patterns, check each path segment
83
+ if "**" in pattern:
84
+ parts = pattern.split("**")
85
+ if len(parts) == 2:
86
+ prefix, suffix = parts
87
+ prefix = prefix.rstrip("/")
88
+ suffix = suffix.lstrip("/")
89
+ if prefix and not path.startswith(prefix):
90
+ return False
91
+ if suffix and not fnmatch.fnmatch(path, f"*{suffix}"):
92
+ return False
93
+ if prefix or suffix:
94
+ return True
95
+
96
+ return False
97
+
98
+
99
+ def find_git_root(start: Optional[Path] = None) -> Optional[Path]:
100
+ """Walk up from start (or cwd) to find the git repository root."""
101
+ current = (start or Path.cwd()).resolve()
102
+ for parent in [current, *current.parents]:
103
+ if (parent / ".git").exists():
104
+ return parent
105
+ return None
106
+
107
+
108
+ def make_temp_file(suffix: str = "", content: str = "") -> Path:
109
+ """Create a named temp file with the given content, return its Path."""
110
+ fd, path = tempfile.mkstemp(suffix=suffix)
111
+ try:
112
+ with open(fd, "w", encoding="utf-8") as f:
113
+ f.write(content)
114
+ except Exception:
115
+ import os
116
+
117
+ os.close(fd)
118
+ raise
119
+ return Path(path)
120
+
121
+
122
+ # Local Variables:
123
+ # eval: (blacken-mode)
124
+ # End: