llm-commit-helper 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_commit_helper/__init__.py +7 -0
- llm_commit_helper/__main__.py +10 -0
- llm_commit_helper/cli.py +214 -0
- llm_commit_helper/config.py +132 -0
- llm_commit_helper/diff_engine.py +91 -0
- llm_commit_helper/formatters/__init__.py +35 -0
- llm_commit_helper/formatters/generic_fmt.py +47 -0
- llm_commit_helper/formatters/python_fmt.py +81 -0
- llm_commit_helper/formatters/verilog_fmt.py +116 -0
- llm_commit_helper/git_staged.py +244 -0
- llm_commit_helper/output.py +83 -0
- llm_commit_helper/submodule.py +96 -0
- llm_commit_helper/utils.py +124 -0
- llm_commit_helper-0.1.0.dist-info/METADATA +287 -0
- llm_commit_helper-0.1.0.dist-info/RECORD +18 -0
- llm_commit_helper-0.1.0.dist-info/WHEEL +5 -0
- llm_commit_helper-0.1.0.dist-info/entry_points.txt +2 -0
- llm_commit_helper-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Verilog formatter: delete AUTO-generated sections before diffing."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import sys
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from llm_commit_helper.utils import make_temp_file, run_command
|
|
8
|
+
from llm_commit_helper.diff_engine import annotate_formatting_hunks
|
|
9
|
+
from llm_commit_helper.formatters.generic_fmt import _hunk_is_formatting_only
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Regex for AUTO* macro keywords
|
|
13
|
+
AUTO_PATTERN = re.compile(
|
|
14
|
+
r"\b(AUTOARG|AUTOINPUT|AUTOOUTPUT|AUTOINOUT|AUTOINSTPARAM|AUTOINST"
|
|
15
|
+
r"|AUTOWIRE|AUTOREG|AUTOREGINPUT|AUTOLOGIC|AUTOASCIIENUM|AUTOSENSE"
|
|
16
|
+
r"|AUTOUNUSED|AUTOTEMPLATE|AUTO_LISP)\b"
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _has_auto_macros(content: str) -> bool:
|
|
21
|
+
"""Return True if content uses any AUTO* Verilog macros."""
|
|
22
|
+
return bool(AUTO_PATTERN.search(content))
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _run_emacs_delete_auto(file_path: str) -> bool:
|
|
26
|
+
"""Delete all AUTO-generated sections from file using emacs verilog-batch-delete-auto.
|
|
27
|
+
|
|
28
|
+
This strips the expanded output (AUTOWIRE, AUTOINST, etc.) so that only
|
|
29
|
+
the hand-written source remains, making diffs independent of AUTO ordering.
|
|
30
|
+
Returns True on success.
|
|
31
|
+
"""
|
|
32
|
+
rc, _, err = run_command(
|
|
33
|
+
[
|
|
34
|
+
"emacs",
|
|
35
|
+
"--batch",
|
|
36
|
+
file_path,
|
|
37
|
+
"-f",
|
|
38
|
+
"verilog-batch-delete-auto",
|
|
39
|
+
"-f",
|
|
40
|
+
"save-buffer",
|
|
41
|
+
]
|
|
42
|
+
)
|
|
43
|
+
return rc == 0
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def format_verilog_diff(
|
|
47
|
+
path: str,
|
|
48
|
+
old_content: Optional[str],
|
|
49
|
+
new_content: Optional[str],
|
|
50
|
+
) -> tuple[str, bool]:
|
|
51
|
+
"""Format Verilog diff by deleting AUTO-generated sections before comparing.
|
|
52
|
+
|
|
53
|
+
verilog-batch-delete-auto removes all AUTO-expanded blocks (AUTOWIRE,
|
|
54
|
+
AUTOINST, etc.), leaving only hand-written source. Diffing the stripped
|
|
55
|
+
versions avoids spurious ordering differences in generated code.
|
|
56
|
+
|
|
57
|
+
Returns (diff_text, is_formatting_only).
|
|
58
|
+
Falls back to generic if emacs is not available or no AUTO* macros found.
|
|
59
|
+
"""
|
|
60
|
+
combined = (old_content or "") + (new_content or "")
|
|
61
|
+
has_auto = _has_auto_macros(combined)
|
|
62
|
+
|
|
63
|
+
old_tmp = None
|
|
64
|
+
new_tmp = None
|
|
65
|
+
try:
|
|
66
|
+
ext = ".sv" if path.endswith(".sv") else ".v"
|
|
67
|
+
old_tmp = make_temp_file(suffix=ext, content=old_content or "")
|
|
68
|
+
new_tmp = make_temp_file(suffix=ext, content=new_content or "")
|
|
69
|
+
|
|
70
|
+
if has_auto:
|
|
71
|
+
old_ok = _run_emacs_delete_auto(str(old_tmp))
|
|
72
|
+
new_ok = _run_emacs_delete_auto(str(new_tmp))
|
|
73
|
+
|
|
74
|
+
if not old_ok or not new_ok:
|
|
75
|
+
print(
|
|
76
|
+
f"[llm-commit-helper] emacs not available or failed for {path}, falling back to generic",
|
|
77
|
+
file=sys.stderr,
|
|
78
|
+
)
|
|
79
|
+
from llm_commit_helper.formatters.generic_fmt import format_generic_diff
|
|
80
|
+
|
|
81
|
+
return format_generic_diff(path, old_content, new_content)
|
|
82
|
+
|
|
83
|
+
old_processed = old_tmp.read_text(encoding="utf-8")
|
|
84
|
+
new_processed = new_tmp.read_text(encoding="utf-8")
|
|
85
|
+
else:
|
|
86
|
+
old_processed = old_content or ""
|
|
87
|
+
new_processed = new_content or ""
|
|
88
|
+
|
|
89
|
+
old_lines = old_processed.splitlines(keepends=True)
|
|
90
|
+
new_lines = new_processed.splitlines(keepends=True)
|
|
91
|
+
|
|
92
|
+
diff_lines, all_formatting = annotate_formatting_hunks(
|
|
93
|
+
old_lines, new_lines, _hunk_is_formatting_only
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if not diff_lines:
|
|
97
|
+
if has_auto:
|
|
98
|
+
return "[all changes are AUTO-generated - no user code changes]", True
|
|
99
|
+
return "", True
|
|
100
|
+
|
|
101
|
+
diff_text = "".join(diff_lines)
|
|
102
|
+
if has_auto and all_formatting:
|
|
103
|
+
diff_text = f"[formatting-only after AUTO deletion]\n{diff_text}"
|
|
104
|
+
|
|
105
|
+
return diff_text, all_formatting
|
|
106
|
+
|
|
107
|
+
finally:
|
|
108
|
+
if old_tmp and old_tmp.exists():
|
|
109
|
+
old_tmp.unlink()
|
|
110
|
+
if new_tmp and new_tmp.exists():
|
|
111
|
+
new_tmp.unlink()
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# Local Variables:
|
|
115
|
+
# eval: (blacken-mode)
|
|
116
|
+
# End:
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
"""Git interaction: staged file listing, classification, content retrieval."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from llm_commit_helper.config import Config
|
|
10
|
+
from llm_commit_helper.utils import find_git_root, glob_match, run_command
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FileStatus(Enum):
|
|
14
|
+
ADDED = "added"
|
|
15
|
+
MODIFIED = "modified"
|
|
16
|
+
DELETED = "deleted"
|
|
17
|
+
RENAMED = "renamed"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FileKind(Enum):
|
|
21
|
+
EXCLUDED = "excluded" # matched exclude rule or too large
|
|
22
|
+
TOO_LARGE = "too_large" # file size exceeds max_file_size
|
|
23
|
+
ADDED = "added" # new file - don't show content
|
|
24
|
+
DELETED = "deleted" # deleted file
|
|
25
|
+
SUBMODULE = "submodule" # gitlink / submodule
|
|
26
|
+
MODIFIED = "modified" # regular diff
|
|
27
|
+
BINARY = "binary" # binary file
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class StagedFile:
|
|
32
|
+
path: str # relative to git root
|
|
33
|
+
status: FileStatus
|
|
34
|
+
is_submodule: bool = False
|
|
35
|
+
old_hash: Optional[str] = None # for submodules: previous commit
|
|
36
|
+
new_hash: Optional[str] = None # for submodules: new commit
|
|
37
|
+
kind: Optional[FileKind] = None # set by classify_file()
|
|
38
|
+
old_content: Optional[str] = None
|
|
39
|
+
new_content: Optional[str] = None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _parse_name_status(output: str) -> list[tuple[str, str, Optional[str]]]:
|
|
43
|
+
"""Parse git diff --name-status output into (status_char, path, old_path) tuples."""
|
|
44
|
+
results = []
|
|
45
|
+
for line in output.splitlines():
|
|
46
|
+
if not line.strip():
|
|
47
|
+
continue
|
|
48
|
+
parts = line.split("\t")
|
|
49
|
+
status_char = parts[0][0] # A, M, D, R, C, etc.
|
|
50
|
+
if status_char in ("R", "C") and len(parts) >= 3:
|
|
51
|
+
# Renamed: R100\told_path\tnew_path
|
|
52
|
+
results.append((status_char, parts[2], parts[1]))
|
|
53
|
+
elif len(parts) >= 2:
|
|
54
|
+
results.append((status_char, parts[1], None))
|
|
55
|
+
return results
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _get_submodule_hashes(git_root: Path) -> dict[str, tuple[str, str]]:
|
|
59
|
+
"""Return {path: (old_hash, new_hash)} for staged submodule changes."""
|
|
60
|
+
rc, out, err = run_command(
|
|
61
|
+
["git", "diff", "--staged", "--raw"],
|
|
62
|
+
cwd=git_root,
|
|
63
|
+
)
|
|
64
|
+
hashes: dict[str, tuple[str, str]] = {}
|
|
65
|
+
if rc != 0:
|
|
66
|
+
return hashes
|
|
67
|
+
|
|
68
|
+
for line in out.splitlines():
|
|
69
|
+
# Format: :old_mode new_mode old_hash new_hash status\tpath
|
|
70
|
+
if not line.startswith(":"):
|
|
71
|
+
continue
|
|
72
|
+
parts = line.split("\t")
|
|
73
|
+
if len(parts) < 2:
|
|
74
|
+
continue
|
|
75
|
+
meta = parts[0].split()
|
|
76
|
+
if len(meta) < 5:
|
|
77
|
+
continue
|
|
78
|
+
old_mode, new_mode, old_hash, new_hash, status = meta[0], meta[1], meta[2], meta[3], meta[4]
|
|
79
|
+
path = parts[1]
|
|
80
|
+
# Submodule mode is 160000
|
|
81
|
+
if old_mode == ":160000" or new_mode == "160000" or old_mode == "160000":
|
|
82
|
+
hashes[path] = (old_hash, new_hash)
|
|
83
|
+
# Also check via the leading : format
|
|
84
|
+
old_mode_clean = old_mode.lstrip(":")
|
|
85
|
+
if old_mode_clean == "160000" or new_mode == "160000":
|
|
86
|
+
hashes[path] = (old_hash, new_hash)
|
|
87
|
+
|
|
88
|
+
return hashes
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _get_submodule_set(git_root: Path) -> set[str]:
|
|
92
|
+
"""Return the set of all submodule paths registered in .gitmodules."""
|
|
93
|
+
rc, out, _ = run_command(
|
|
94
|
+
["git", "config", "--file", ".gitmodules", "--get-regexp", "path"],
|
|
95
|
+
cwd=git_root,
|
|
96
|
+
)
|
|
97
|
+
paths = set()
|
|
98
|
+
if rc != 0:
|
|
99
|
+
return paths
|
|
100
|
+
for line in out.splitlines():
|
|
101
|
+
parts = line.split()
|
|
102
|
+
if len(parts) >= 2:
|
|
103
|
+
paths.add(parts[1])
|
|
104
|
+
return paths
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_staged_files(git_root: Optional[Path] = None) -> list[StagedFile]:
|
|
108
|
+
"""Return StagedFile list for all currently staged changes."""
|
|
109
|
+
if git_root is None:
|
|
110
|
+
git_root = find_git_root()
|
|
111
|
+
if git_root is None:
|
|
112
|
+
print("[llm-commit-helper] Error: not inside a git repository", file=sys.stderr)
|
|
113
|
+
return []
|
|
114
|
+
|
|
115
|
+
rc, out, err = run_command(
|
|
116
|
+
["git", "diff", "--staged", "--name-status"],
|
|
117
|
+
cwd=git_root,
|
|
118
|
+
)
|
|
119
|
+
if rc != 0:
|
|
120
|
+
print(f"[llm-commit-helper] git diff failed: {err}", file=sys.stderr)
|
|
121
|
+
return []
|
|
122
|
+
|
|
123
|
+
parsed = _parse_name_status(out)
|
|
124
|
+
submodule_paths = _get_submodule_set(git_root)
|
|
125
|
+
submodule_hashes = _get_submodule_hashes(git_root)
|
|
126
|
+
|
|
127
|
+
status_map = {
|
|
128
|
+
"A": FileStatus.ADDED,
|
|
129
|
+
"M": FileStatus.MODIFIED,
|
|
130
|
+
"D": FileStatus.DELETED,
|
|
131
|
+
"R": FileStatus.RENAMED,
|
|
132
|
+
"C": FileStatus.MODIFIED,
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
files = []
|
|
136
|
+
for status_char, path, old_path in parsed:
|
|
137
|
+
status = status_map.get(status_char, FileStatus.MODIFIED)
|
|
138
|
+
is_sub = path in submodule_paths
|
|
139
|
+
old_h, new_h = submodule_hashes.get(path, (None, None))
|
|
140
|
+
files.append(
|
|
141
|
+
StagedFile(
|
|
142
|
+
path=path,
|
|
143
|
+
status=status,
|
|
144
|
+
is_submodule=is_sub,
|
|
145
|
+
old_hash=old_h,
|
|
146
|
+
new_hash=new_h,
|
|
147
|
+
)
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
return files
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _file_size_in_index(path: str, git_root: Path) -> int:
|
|
154
|
+
"""Return the size (bytes) of the staged version of a file, or 0 on error."""
|
|
155
|
+
rc, out, _ = run_command(
|
|
156
|
+
["git", "cat-file", "-s", f":0:{path}"],
|
|
157
|
+
cwd=git_root,
|
|
158
|
+
)
|
|
159
|
+
if rc != 0:
|
|
160
|
+
return 0
|
|
161
|
+
try:
|
|
162
|
+
return int(out.strip())
|
|
163
|
+
except ValueError:
|
|
164
|
+
return 0
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _is_binary(path: str, git_root: Path) -> bool:
|
|
168
|
+
"""Heuristic: check if the staged file is binary."""
|
|
169
|
+
rc, out, _ = run_command(
|
|
170
|
+
["git", "diff", "--staged", "--numstat", "--", path],
|
|
171
|
+
cwd=git_root,
|
|
172
|
+
)
|
|
173
|
+
if rc == 0 and out.startswith("-\t-"):
|
|
174
|
+
return True
|
|
175
|
+
return False
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def classify_file(f: StagedFile, config: Config, git_root: Path) -> FileKind:
|
|
179
|
+
"""Assign a FileKind to a StagedFile based on config rules."""
|
|
180
|
+
# Submodule takes priority
|
|
181
|
+
if f.is_submodule:
|
|
182
|
+
return FileKind.SUBMODULE
|
|
183
|
+
|
|
184
|
+
# Exclude patterns
|
|
185
|
+
for pattern in config.exclude_patterns:
|
|
186
|
+
if glob_match(pattern, f.path):
|
|
187
|
+
return FileKind.EXCLUDED
|
|
188
|
+
|
|
189
|
+
# Added files — no content shown
|
|
190
|
+
if f.status == FileStatus.ADDED:
|
|
191
|
+
return FileKind.ADDED
|
|
192
|
+
|
|
193
|
+
# Deleted files
|
|
194
|
+
if f.status == FileStatus.DELETED:
|
|
195
|
+
return FileKind.DELETED
|
|
196
|
+
|
|
197
|
+
# Size check (staged version)
|
|
198
|
+
size = _file_size_in_index(f.path, git_root)
|
|
199
|
+
if size > config.max_file_size:
|
|
200
|
+
return FileKind.TOO_LARGE
|
|
201
|
+
|
|
202
|
+
# Binary check
|
|
203
|
+
if _is_binary(f.path, git_root):
|
|
204
|
+
return FileKind.BINARY
|
|
205
|
+
|
|
206
|
+
return FileKind.MODIFIED
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def get_file_content(path: str, git_root: Path, staged: bool = True) -> Optional[str]:
|
|
210
|
+
"""Retrieve file content from git: staged (index) or committed (HEAD)."""
|
|
211
|
+
if staged:
|
|
212
|
+
ref = f":0:{path}"
|
|
213
|
+
else:
|
|
214
|
+
ref = f"HEAD:{path}"
|
|
215
|
+
|
|
216
|
+
rc, out, err = run_command(["git", "show", ref], cwd=git_root)
|
|
217
|
+
if rc != 0:
|
|
218
|
+
return None
|
|
219
|
+
return out
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def load_file_contents(f: StagedFile, git_root: Path) -> StagedFile:
|
|
223
|
+
"""Load old and new content into StagedFile for MODIFIED/DELETED files."""
|
|
224
|
+
if f.kind not in (FileKind.MODIFIED, FileKind.DELETED):
|
|
225
|
+
return f
|
|
226
|
+
|
|
227
|
+
old = get_file_content(f.path, git_root, staged=False)
|
|
228
|
+
new = get_file_content(f.path, git_root, staged=True) if f.kind == FileKind.MODIFIED else None
|
|
229
|
+
|
|
230
|
+
return StagedFile(
|
|
231
|
+
path=f.path,
|
|
232
|
+
status=f.status,
|
|
233
|
+
is_submodule=f.is_submodule,
|
|
234
|
+
old_hash=f.old_hash,
|
|
235
|
+
new_hash=f.new_hash,
|
|
236
|
+
kind=f.kind,
|
|
237
|
+
old_content=old,
|
|
238
|
+
new_content=new,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
# Local Variables:
|
|
243
|
+
# eval: (blacken-mode)
|
|
244
|
+
# End:
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Output assembly: size-budgeted output builder."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class FileSummary:
|
|
9
|
+
path: str
|
|
10
|
+
label: str # e.g. "modified", "added", "excluded", ...
|
|
11
|
+
content: str # the body text for this file section
|
|
12
|
+
truncated: bool = False
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class OutputBuilder:
|
|
16
|
+
"""Assembles output sections while respecting max_total_size budget."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, max_total_size: int) -> None:
|
|
19
|
+
self._max = max_total_size
|
|
20
|
+
self._sections: list[str] = []
|
|
21
|
+
self._current_size: int = 0
|
|
22
|
+
self._truncated: bool = False
|
|
23
|
+
self._truncated_files: list[str] = []
|
|
24
|
+
|
|
25
|
+
def _remaining(self) -> int:
|
|
26
|
+
return self._max - self._current_size
|
|
27
|
+
|
|
28
|
+
def add_section(self, text: str, file_path: Optional[str] = None) -> bool:
|
|
29
|
+
"""Add a text section. Returns True if added in full, False if budget exceeded."""
|
|
30
|
+
if self._truncated:
|
|
31
|
+
if file_path:
|
|
32
|
+
self._truncated_files.append(file_path)
|
|
33
|
+
return False
|
|
34
|
+
|
|
35
|
+
size = len(text)
|
|
36
|
+
if self._current_size + size > self._max:
|
|
37
|
+
self._truncated = True
|
|
38
|
+
if file_path:
|
|
39
|
+
self._truncated_files.append(file_path)
|
|
40
|
+
return False
|
|
41
|
+
|
|
42
|
+
self._sections.append(text)
|
|
43
|
+
self._current_size += size
|
|
44
|
+
return True
|
|
45
|
+
|
|
46
|
+
def build(self, header: str, footer_template: str) -> str:
|
|
47
|
+
"""Build the final output string."""
|
|
48
|
+
parts = [header]
|
|
49
|
+
parts.extend(self._sections)
|
|
50
|
+
|
|
51
|
+
if self._truncated:
|
|
52
|
+
parts.append("\n[OUTPUT TRUNCATED - budget exceeded]\n")
|
|
53
|
+
if self._truncated_files:
|
|
54
|
+
parts.append("Remaining files (not shown):\n")
|
|
55
|
+
for p in self._truncated_files:
|
|
56
|
+
parts.append(f" {p}\n")
|
|
57
|
+
|
|
58
|
+
total_chars = sum(len(p) for p in parts)
|
|
59
|
+
footer = footer_template.format(total_chars=total_chars)
|
|
60
|
+
parts.append(footer)
|
|
61
|
+
return "".join(parts)
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def is_truncated(self) -> bool:
|
|
65
|
+
return self._truncated
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def current_size(self) -> int:
|
|
69
|
+
return self._current_size
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def format_file_header(path: str, label: str, extra: str = "") -> str:
|
|
73
|
+
"""Format a file section header line."""
|
|
74
|
+
parts = [f"--- File: {path} [{label}]"]
|
|
75
|
+
if extra:
|
|
76
|
+
parts.append(f" [{extra}]")
|
|
77
|
+
parts.append(" ---\n")
|
|
78
|
+
return "".join(parts)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# Local Variables:
|
|
82
|
+
# eval: (blacken-mode)
|
|
83
|
+
# End:
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Submodule log: fetch commit log between old and new hashes."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from llm_commit_helper.utils import run_command
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
_NULL_HASH = set(["0" * 7, "0" * 8, "0" * 40])
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _is_null_hash(h: str) -> bool:
|
|
14
|
+
"""Return True if hash is the all-zeros null hash (new submodule addition)."""
|
|
15
|
+
return not h.strip("0")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_submodule_log(
|
|
19
|
+
submodule_path: str,
|
|
20
|
+
old_hash: Optional[str],
|
|
21
|
+
new_hash: Optional[str],
|
|
22
|
+
git_root: Path,
|
|
23
|
+
) -> list[str]:
|
|
24
|
+
"""Return one-line log entries for a submodule update.
|
|
25
|
+
|
|
26
|
+
For a newly added submodule (old hash is null), returns the last few
|
|
27
|
+
commits of new_hash instead of an old..new range.
|
|
28
|
+
Returns an empty list if the submodule is uninitialized or hashes are missing.
|
|
29
|
+
"""
|
|
30
|
+
if not old_hash or not new_hash:
|
|
31
|
+
return []
|
|
32
|
+
|
|
33
|
+
abs_path = git_root / submodule_path
|
|
34
|
+
if not abs_path.is_dir():
|
|
35
|
+
print(
|
|
36
|
+
f"[llm-commit-helper] Submodule not initialized: {submodule_path}",
|
|
37
|
+
file=sys.stderr,
|
|
38
|
+
)
|
|
39
|
+
return []
|
|
40
|
+
|
|
41
|
+
if _is_null_hash(old_hash):
|
|
42
|
+
# Newly added submodule — show the tip commit only
|
|
43
|
+
rc, out, err = run_command(
|
|
44
|
+
["git", "log", "--oneline", "-5", new_hash],
|
|
45
|
+
cwd=abs_path,
|
|
46
|
+
)
|
|
47
|
+
else:
|
|
48
|
+
rc, out, err = run_command(
|
|
49
|
+
["git", "log", "--oneline", f"{old_hash}..{new_hash}"],
|
|
50
|
+
cwd=abs_path,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
if rc != 0:
|
|
54
|
+
print(
|
|
55
|
+
f"[llm-commit-helper] Could not get submodule log for {submodule_path}: {err.strip()}",
|
|
56
|
+
file=sys.stderr,
|
|
57
|
+
)
|
|
58
|
+
return []
|
|
59
|
+
|
|
60
|
+
lines = [line for line in out.splitlines() if line.strip()]
|
|
61
|
+
return lines
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def format_submodule_section(
|
|
65
|
+
submodule_path: str,
|
|
66
|
+
old_hash: Optional[str],
|
|
67
|
+
new_hash: Optional[str],
|
|
68
|
+
log_lines: list[str],
|
|
69
|
+
) -> str:
|
|
70
|
+
"""Format the submodule section for output."""
|
|
71
|
+
new_short = (new_hash or "?")[:8]
|
|
72
|
+
is_new = not old_hash or _is_null_hash(old_hash)
|
|
73
|
+
|
|
74
|
+
if is_new:
|
|
75
|
+
lines = [
|
|
76
|
+
f"--- Submodule: {submodule_path} ---",
|
|
77
|
+
f"Added at: {new_short}",
|
|
78
|
+
]
|
|
79
|
+
else:
|
|
80
|
+
old_short = (old_hash or "?")[:8]
|
|
81
|
+
lines = [
|
|
82
|
+
f"--- Submodule: {submodule_path} ---",
|
|
83
|
+
f"Updated: {old_short} -> {new_short}",
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
if log_lines:
|
|
87
|
+
for entry in log_lines:
|
|
88
|
+
lines.append(f" {entry}")
|
|
89
|
+
else:
|
|
90
|
+
lines.append(" [no log available]")
|
|
91
|
+
return "\n".join(lines)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# Local Variables:
|
|
95
|
+
# eval: (blacken-mode)
|
|
96
|
+
# End:
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Utility functions: subprocess, size parsing, glob matching, git root."""
|
|
2
|
+
|
|
3
|
+
import fnmatch
|
|
4
|
+
import subprocess
|
|
5
|
+
import sys
|
|
6
|
+
import tempfile
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
COMMAND_TIMEOUT = 30 # seconds
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def run_command(
|
|
15
|
+
args: list[str],
|
|
16
|
+
cwd: Optional[Path] = None,
|
|
17
|
+
check: bool = False,
|
|
18
|
+
) -> tuple[int, str, str]:
|
|
19
|
+
"""Run a subprocess command and return (returncode, stdout, stderr).
|
|
20
|
+
|
|
21
|
+
Never raises on non-zero exit unless check=True.
|
|
22
|
+
Always enforces a 30-second timeout.
|
|
23
|
+
"""
|
|
24
|
+
try:
|
|
25
|
+
result = subprocess.run(
|
|
26
|
+
args,
|
|
27
|
+
cwd=cwd,
|
|
28
|
+
capture_output=True,
|
|
29
|
+
text=True,
|
|
30
|
+
timeout=COMMAND_TIMEOUT,
|
|
31
|
+
check=check,
|
|
32
|
+
)
|
|
33
|
+
return result.returncode, result.stdout, result.stderr
|
|
34
|
+
except subprocess.TimeoutExpired:
|
|
35
|
+
print(f"[llm-commit-helper] Timeout running: {' '.join(args)}", file=sys.stderr)
|
|
36
|
+
return 1, "", "timeout"
|
|
37
|
+
except FileNotFoundError:
|
|
38
|
+
return 1, "", f"command not found: {args[0]}"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def parse_size(value: str | int | float) -> int:
|
|
42
|
+
"""Parse a human-readable size string into bytes.
|
|
43
|
+
|
|
44
|
+
Accepts: 200MB, 20KB, 4096, 1.5GB, etc.
|
|
45
|
+
Returns the size in bytes as an integer.
|
|
46
|
+
"""
|
|
47
|
+
if isinstance(value, (int, float)):
|
|
48
|
+
return int(value)
|
|
49
|
+
|
|
50
|
+
s = str(value).strip().upper()
|
|
51
|
+
suffixes = {
|
|
52
|
+
"GB": 1024**3,
|
|
53
|
+
"MB": 1024**2,
|
|
54
|
+
"KB": 1024,
|
|
55
|
+
"B": 1,
|
|
56
|
+
}
|
|
57
|
+
for suffix, multiplier in suffixes.items():
|
|
58
|
+
if s.endswith(suffix):
|
|
59
|
+
number = s[: -len(suffix)].strip()
|
|
60
|
+
return int(float(number) * multiplier)
|
|
61
|
+
return int(float(s))
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def glob_match(pattern: str, path: str) -> bool:
|
|
65
|
+
"""Return True if path matches a glob pattern.
|
|
66
|
+
|
|
67
|
+
Supports ** for directory wildcards via fnmatch with path normalization.
|
|
68
|
+
"""
|
|
69
|
+
# Normalize separators
|
|
70
|
+
path = path.replace("\\", "/")
|
|
71
|
+
pattern = pattern.replace("\\", "/")
|
|
72
|
+
|
|
73
|
+
# Try direct match
|
|
74
|
+
if fnmatch.fnmatch(path, pattern):
|
|
75
|
+
return True
|
|
76
|
+
|
|
77
|
+
# Try matching just the filename against the pattern
|
|
78
|
+
filename = Path(path).name
|
|
79
|
+
if fnmatch.fnmatch(filename, pattern):
|
|
80
|
+
return True
|
|
81
|
+
|
|
82
|
+
# For ** patterns, check each path segment
|
|
83
|
+
if "**" in pattern:
|
|
84
|
+
parts = pattern.split("**")
|
|
85
|
+
if len(parts) == 2:
|
|
86
|
+
prefix, suffix = parts
|
|
87
|
+
prefix = prefix.rstrip("/")
|
|
88
|
+
suffix = suffix.lstrip("/")
|
|
89
|
+
if prefix and not path.startswith(prefix):
|
|
90
|
+
return False
|
|
91
|
+
if suffix and not fnmatch.fnmatch(path, f"*{suffix}"):
|
|
92
|
+
return False
|
|
93
|
+
if prefix or suffix:
|
|
94
|
+
return True
|
|
95
|
+
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def find_git_root(start: Optional[Path] = None) -> Optional[Path]:
|
|
100
|
+
"""Walk up from start (or cwd) to find the git repository root."""
|
|
101
|
+
current = (start or Path.cwd()).resolve()
|
|
102
|
+
for parent in [current, *current.parents]:
|
|
103
|
+
if (parent / ".git").exists():
|
|
104
|
+
return parent
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def make_temp_file(suffix: str = "", content: str = "") -> Path:
|
|
109
|
+
"""Create a named temp file with the given content, return its Path."""
|
|
110
|
+
fd, path = tempfile.mkstemp(suffix=suffix)
|
|
111
|
+
try:
|
|
112
|
+
with open(fd, "w", encoding="utf-8") as f:
|
|
113
|
+
f.write(content)
|
|
114
|
+
except Exception:
|
|
115
|
+
import os
|
|
116
|
+
|
|
117
|
+
os.close(fd)
|
|
118
|
+
raise
|
|
119
|
+
return Path(path)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# Local Variables:
|
|
123
|
+
# eval: (blacken-mode)
|
|
124
|
+
# End:
|