diffstory 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffstory/__init__.py +3 -0
- diffstory/__main__.py +5 -0
- diffstory/cli.py +402 -0
- diffstory/diff_parser.py +298 -0
- diffstory/git_utils.py +365 -0
- diffstory/html_generator.py +2343 -0
- diffstory/syntax.py +145 -0
- diffstory-0.2.0.dist-info/METADATA +207 -0
- diffstory-0.2.0.dist-info/RECORD +12 -0
- diffstory-0.2.0.dist-info/WHEEL +5 -0
- diffstory-0.2.0.dist-info/entry_points.txt +2 -0
- diffstory-0.2.0.dist-info/top_level.txt +1 -0
diffstory/__init__.py
ADDED
diffstory/__main__.py
ADDED
diffstory/cli.py
ADDED
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
"""CLI entry point for DiffStory — parse arguments, gather diffs, generate reports."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
from diffstory import __version__
|
|
12
|
+
from diffstory.diff_parser import parse_diff
|
|
13
|
+
from diffstory.git_utils import (
|
|
14
|
+
GitError,
|
|
15
|
+
check_git_repo,
|
|
16
|
+
get_diff,
|
|
17
|
+
get_diff_with_renames,
|
|
18
|
+
)
|
|
19
|
+
from diffstory.html_generator import generate_report
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
# Config file (.diffstory.toml) loader
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
DEFAULT_CONFIG_PATH = Path.home() / ".diffstory.toml"
|
|
26
|
+
LOCAL_CONFIG_PATH = Path.cwd() / ".diffstory.toml"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _load_config() -> dict:
|
|
30
|
+
"""Load .diffstory.toml config from project or home directory.
|
|
31
|
+
|
|
32
|
+
Returns a dict with keys that serve as defaults for CLI flags.
|
|
33
|
+
"""
|
|
34
|
+
config: dict = {}
|
|
35
|
+
|
|
36
|
+
for cfg_path in (LOCAL_CONFIG_PATH, DEFAULT_CONFIG_PATH):
|
|
37
|
+
if cfg_path.exists():
|
|
38
|
+
try:
|
|
39
|
+
text = cfg_path.read_text(encoding="utf-8")
|
|
40
|
+
config.update(_parse_toml_like(text))
|
|
41
|
+
except Exception:
|
|
42
|
+
pass # ignore broken config files
|
|
43
|
+
return config
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _parse_toml_like(text: str) -> dict:
|
|
47
|
+
"""Minimal TOML parser — enough for our trivial config schema."""
|
|
48
|
+
result: dict = {}
|
|
49
|
+
current_section: Optional[str] = None
|
|
50
|
+
for line in text.splitlines():
|
|
51
|
+
stripped = line.strip()
|
|
52
|
+
if not stripped or stripped.startswith("#"):
|
|
53
|
+
continue
|
|
54
|
+
if stripped.startswith("[") and stripped.endswith("]"):
|
|
55
|
+
current_section = stripped[1:-1].strip().lower()
|
|
56
|
+
continue
|
|
57
|
+
if "=" in stripped:
|
|
58
|
+
key, _, val = stripped.partition("=")
|
|
59
|
+
key = key.strip().lower()
|
|
60
|
+
val = val.strip().strip('"').strip("'")
|
|
61
|
+
# Lower-case boolean-ish strings
|
|
62
|
+
if val.lower() in ("true", "yes", "on"):
|
|
63
|
+
val = True
|
|
64
|
+
elif val.lower() in ("false", "no", "off"):
|
|
65
|
+
val = False
|
|
66
|
+
else:
|
|
67
|
+
try:
|
|
68
|
+
val = int(val)
|
|
69
|
+
except ValueError:
|
|
70
|
+
pass
|
|
71
|
+
full_key = f"{current_section}.{key}" if current_section else key
|
|
72
|
+
result[full_key] = val
|
|
73
|
+
return result
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
77
|
+
"""Build the CLI argument parser."""
|
|
78
|
+
parser = argparse.ArgumentParser(
|
|
79
|
+
prog="diffstory",
|
|
80
|
+
description="Transform Git diffs into rich, interactive, self-contained HTML reports.",
|
|
81
|
+
epilog=(
|
|
82
|
+
"Examples:\\n"
|
|
83
|
+
" diffstory # working tree diff\\n"
|
|
84
|
+
" diffstory --staged # staged changes\\n"
|
|
85
|
+
" diffstory HEAD~3 HEAD # commit comparison\\n"
|
|
86
|
+
" diffstory main feature # branch comparison\\n"
|
|
87
|
+
" diffstory -o report.html # custom output file\\n"
|
|
88
|
+
" diffstory --json # JSON export\\n"
|
|
89
|
+
" diffstory HEAD~3 HEAD src/ # restrict to path"
|
|
90
|
+
),
|
|
91
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
parser.add_argument(
|
|
95
|
+
"--version",
|
|
96
|
+
action="version",
|
|
97
|
+
version=f"diffstory {__version__}",
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
parser.add_argument(
|
|
101
|
+
"--staged",
|
|
102
|
+
action="store_true",
|
|
103
|
+
help="Show staged changes (equivalent to git diff --cached)",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
parser.add_argument(
|
|
107
|
+
"revisions",
|
|
108
|
+
nargs="*",
|
|
109
|
+
metavar="REVISION",
|
|
110
|
+
help="Optional commit range: REVISION [REVISION] [-- path]",
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
parser.add_argument(
|
|
114
|
+
"-o",
|
|
115
|
+
"--output",
|
|
116
|
+
default="diffstory-report.html",
|
|
117
|
+
metavar="FILE",
|
|
118
|
+
help="Output file path (default: diffstory-report.html)",
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
parser.add_argument(
|
|
122
|
+
"--json",
|
|
123
|
+
action="store_true",
|
|
124
|
+
help="Export diff data as JSON",
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
parser.add_argument(
|
|
128
|
+
"--md",
|
|
129
|
+
action="store_true",
|
|
130
|
+
help="Export diff summary as Markdown",
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
parser.add_argument(
|
|
134
|
+
"--csv",
|
|
135
|
+
action="store_true",
|
|
136
|
+
help="Export diff stats as CSV",
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
parser.add_argument(
|
|
140
|
+
"--diff",
|
|
141
|
+
metavar="FILE",
|
|
142
|
+
help="Generate report from a diff file directly (no git repository needed)",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
parser.add_argument(
|
|
146
|
+
"--verbose", "-v",
|
|
147
|
+
action="store_true",
|
|
148
|
+
default=None,
|
|
149
|
+
help="Show git commands and timing information",
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
parser.add_argument(
|
|
153
|
+
"--debug",
|
|
154
|
+
action="store_true",
|
|
155
|
+
default=None,
|
|
156
|
+
help="Show detailed debug output including stack traces",
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
return parser
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _parse_revisions(args: argparse.Namespace) -> tuple[Optional[str], Optional[str], Optional[list[str]]]:
|
|
163
|
+
"""Extract commit range and optional path restriction from positional args.
|
|
164
|
+
|
|
165
|
+
Supports:
|
|
166
|
+
[] -> working tree
|
|
167
|
+
[COMMIT_A, COMMIT_B] -> commit comparison
|
|
168
|
+
[COMMIT_A, COMMIT_B, --, path] -> restricted comparison
|
|
169
|
+
[COMMIT] -> comparison with HEAD
|
|
170
|
+
"""
|
|
171
|
+
revisions = args.revisions
|
|
172
|
+
paths: Optional[list[str]] = None
|
|
173
|
+
commit_a: Optional[str] = None
|
|
174
|
+
commit_b: Optional[str] = None
|
|
175
|
+
|
|
176
|
+
if not revisions:
|
|
177
|
+
return None, None, None
|
|
178
|
+
|
|
179
|
+
# Check for path separator
|
|
180
|
+
if "--" in revisions:
|
|
181
|
+
sep_idx = revisions.index("--")
|
|
182
|
+
revisions = revisions[:sep_idx]
|
|
183
|
+
paths = revisions[sep_idx + 1:]
|
|
184
|
+
|
|
185
|
+
if len(revisions) == 1:
|
|
186
|
+
commit_a = revisions[0]
|
|
187
|
+
elif len(revisions) >= 2:
|
|
188
|
+
commit_a = revisions[0]
|
|
189
|
+
commit_b = revisions[1]
|
|
190
|
+
|
|
191
|
+
return commit_a, commit_b, paths
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def generate_exports(
|
|
195
|
+
files,
|
|
196
|
+
output_path: str,
|
|
197
|
+
json_export: bool = False,
|
|
198
|
+
md_export: bool = False,
|
|
199
|
+
csv_export: bool = False,
|
|
200
|
+
) -> None:
|
|
201
|
+
"""Generate non-HTML export formats."""
|
|
202
|
+
base = Path(output_path)
|
|
203
|
+
stem = base.stem
|
|
204
|
+
|
|
205
|
+
if json_export:
|
|
206
|
+
_export_json(files, base.with_name(stem + ".json"))
|
|
207
|
+
if md_export:
|
|
208
|
+
_export_markdown(files, base.with_name(stem + ".md"))
|
|
209
|
+
if csv_export:
|
|
210
|
+
_export_csv(files, base.with_name(stem + ".csv"))
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _export_json(files, output_path: Path) -> None:
|
|
214
|
+
"""Export diff data as JSON."""
|
|
215
|
+
import json
|
|
216
|
+
|
|
217
|
+
data = []
|
|
218
|
+
for f in files:
|
|
219
|
+
file_data = {
|
|
220
|
+
"old_path": f.old_path,
|
|
221
|
+
"new_path": f.new_path,
|
|
222
|
+
"status": f.status,
|
|
223
|
+
"hunks": [],
|
|
224
|
+
}
|
|
225
|
+
for hunk in f.hunks:
|
|
226
|
+
hunk_data = {
|
|
227
|
+
"old_start": hunk.old_start,
|
|
228
|
+
"old_count": hunk.old_count,
|
|
229
|
+
"new_start": hunk.new_start,
|
|
230
|
+
"new_count": hunk.new_count,
|
|
231
|
+
"lines": [],
|
|
232
|
+
}
|
|
233
|
+
for line in hunk.lines:
|
|
234
|
+
hunk_data["lines"].append({
|
|
235
|
+
"type": line.line_type,
|
|
236
|
+
"content": line.content,
|
|
237
|
+
"old_lineno": line.old_lineno,
|
|
238
|
+
"new_lineno": line.new_lineno,
|
|
239
|
+
})
|
|
240
|
+
file_data["hunks"].append(hunk_data)
|
|
241
|
+
data.append(file_data)
|
|
242
|
+
|
|
243
|
+
output_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
|
244
|
+
print(f" JSON: {output_path}")
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _export_markdown(files, output_path: Path) -> None:
|
|
248
|
+
"""Export diff summary as Markdown."""
|
|
249
|
+
lines = ["# DiffStory Report\n"]
|
|
250
|
+
for f in files:
|
|
251
|
+
adds = sum(1 for h in f.hunks for l in h.lines if l.line_type == "addition")
|
|
252
|
+
dels = sum(1 for h in f.hunks for l in h.lines if l.line_type == "deletion")
|
|
253
|
+
status_icon = {"added": "+", "deleted": "-", "renamed": "→", "modified": "~"}.get(f.status, "~")
|
|
254
|
+
lines.append(f"## {status_icon} `{f.display_path}`")
|
|
255
|
+
lines.append(f"- **Status:** {f.status}")
|
|
256
|
+
lines.append(f"- **Additions:** {adds}")
|
|
257
|
+
lines.append(f"- **Deletions:** {dels}\n")
|
|
258
|
+
for hunk in f.hunks:
|
|
259
|
+
lines.append(f"### @@ {hunk.old_start},{hunk.old_count} {hunk.new_start},{hunk.new_count} @@")
|
|
260
|
+
if hunk.header:
|
|
261
|
+
lines.append(f"_{hunk.header}_\n")
|
|
262
|
+
for line in hunk.lines:
|
|
263
|
+
prefix = {"context": " ", "addition": "+", "deletion": "-"}[line.line_type]
|
|
264
|
+
lines.append(f" {prefix} {line.content}")
|
|
265
|
+
lines.append("")
|
|
266
|
+
|
|
267
|
+
output_path.write_text("\n".join(lines), encoding="utf-8")
|
|
268
|
+
print(f" Markdown: {output_path}")
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _export_csv(files, output_path: Path) -> None:
|
|
272
|
+
"""Export diff stats as CSV."""
|
|
273
|
+
rows = ["file,status,additions,deletions"]
|
|
274
|
+
for f in files:
|
|
275
|
+
adds = sum(1 for h in f.hunks for l in h.lines if l.line_type == "addition")
|
|
276
|
+
dels = sum(1 for h in f.hunks for l in h.lines if l.line_type == "deletion")
|
|
277
|
+
rows.append(f"{f.display_path},{f.status},{adds},{dels}")
|
|
278
|
+
|
|
279
|
+
output_path.write_text("\n".join(rows) + "\n", encoding="utf-8")
|
|
280
|
+
print(f" CSV: {output_path}")
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _read_diff_from_file(path: str) -> str:
|
|
284
|
+
"""Read diff content from a file."""
|
|
285
|
+
try:
|
|
286
|
+
return Path(path).read_text(encoding="utf-8")
|
|
287
|
+
except FileNotFoundError:
|
|
288
|
+
print(f"Error: Diff file not found: {path}", file=sys.stderr)
|
|
289
|
+
sys.exit(1)
|
|
290
|
+
except Exception as e:
|
|
291
|
+
print(f"Error reading diff file: {e}", file=sys.stderr)
|
|
292
|
+
sys.exit(1)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def main() -> None:
|
|
296
|
+
"""Main entry point for the diffstory CLI."""
|
|
297
|
+
parser = build_parser()
|
|
298
|
+
args = parser.parse_args()
|
|
299
|
+
|
|
300
|
+
# Load config file for defaults
|
|
301
|
+
config = _load_config()
|
|
302
|
+
verbose = args.verbose if args.verbose is not None else config.get("cli.verbose", False)
|
|
303
|
+
debug = args.debug if args.debug is not None else config.get("cli.debug", False)
|
|
304
|
+
|
|
305
|
+
if debug:
|
|
306
|
+
verbose = True # debug implies verbose
|
|
307
|
+
|
|
308
|
+
# Handle --diff flag (read diff from file, no git needed)
|
|
309
|
+
if args.diff:
|
|
310
|
+
if verbose:
|
|
311
|
+
print(f" Reading diff file: {args.diff}")
|
|
312
|
+
diff_text = _read_diff_from_file(args.diff)
|
|
313
|
+
commit_a = None
|
|
314
|
+
commit_b = None
|
|
315
|
+
files = parse_diff(diff_text)
|
|
316
|
+
if not files:
|
|
317
|
+
print("No parseable diff files found.")
|
|
318
|
+
sys.exit(0)
|
|
319
|
+
has_exports = args.json or args.md or args.csv
|
|
320
|
+
if has_exports:
|
|
321
|
+
generate_exports(files, args.output, args.json, args.md, args.csv)
|
|
322
|
+
try:
|
|
323
|
+
report_path = generate_report(files, output_path=args.output, repo_name="diff", verbose=verbose)
|
|
324
|
+
except Exception as e:
|
|
325
|
+
if debug:
|
|
326
|
+
import traceback
|
|
327
|
+
traceback.print_exc()
|
|
328
|
+
print(f"Error generating report: {e}", file=sys.stderr)
|
|
329
|
+
sys.exit(1)
|
|
330
|
+
print(f"\\n HTML: {report_path}")
|
|
331
|
+
print(" Report generated successfully!")
|
|
332
|
+
return
|
|
333
|
+
|
|
334
|
+
# Validate Git repository
|
|
335
|
+
if not check_git_repo():
|
|
336
|
+
print("Error: Not inside a Git repository.", file=sys.stderr)
|
|
337
|
+
sys.exit(1)
|
|
338
|
+
|
|
339
|
+
# Parse revisions
|
|
340
|
+
commit_a, commit_b, paths = _parse_revisions(args)
|
|
341
|
+
|
|
342
|
+
if verbose:
|
|
343
|
+
rev_desc = "staged" if args.staged else "working tree"
|
|
344
|
+
if commit_a and commit_b:
|
|
345
|
+
rev_desc = f"{commit_a}..{commit_b}"
|
|
346
|
+
elif commit_a:
|
|
347
|
+
rev_desc = commit_a
|
|
348
|
+
print(f" Diff: {rev_desc}")
|
|
349
|
+
|
|
350
|
+
try:
|
|
351
|
+
# Get diff
|
|
352
|
+
diff_text = get_diff_with_renames(
|
|
353
|
+
staged=args.staged,
|
|
354
|
+
commit_a=commit_a,
|
|
355
|
+
commit_b=commit_b,
|
|
356
|
+
paths=paths,
|
|
357
|
+
)
|
|
358
|
+
except GitError as e:
|
|
359
|
+
print(f"Error fetching diff: {e}", file=sys.stderr)
|
|
360
|
+
sys.exit(1)
|
|
361
|
+
|
|
362
|
+
if not diff_text.strip():
|
|
363
|
+
print("No changes detected.")
|
|
364
|
+
sys.exit(0)
|
|
365
|
+
|
|
366
|
+
if verbose:
|
|
367
|
+
print(f" Diff size: {len(diff_text)} bytes")
|
|
368
|
+
|
|
369
|
+
# Parse diff
|
|
370
|
+
files = parse_diff(diff_text)
|
|
371
|
+
|
|
372
|
+
if not files:
|
|
373
|
+
print("No parseable diff files found.")
|
|
374
|
+
sys.exit(0)
|
|
375
|
+
|
|
376
|
+
if verbose:
|
|
377
|
+
print(f" Files changed: {len(files)}")
|
|
378
|
+
|
|
379
|
+
# Generate exports if requested
|
|
380
|
+
has_exports = args.json or args.md or args.csv
|
|
381
|
+
if has_exports:
|
|
382
|
+
generate_exports(files, args.output, args.json, args.md, args.csv)
|
|
383
|
+
|
|
384
|
+
# Always generate HTML report
|
|
385
|
+
try:
|
|
386
|
+
report_path = generate_report(
|
|
387
|
+
files,
|
|
388
|
+
output_path=args.output,
|
|
389
|
+
staged=args.staged,
|
|
390
|
+
commit_a=commit_a,
|
|
391
|
+
commit_b=commit_b,
|
|
392
|
+
verbose=verbose,
|
|
393
|
+
)
|
|
394
|
+
except Exception as e:
|
|
395
|
+
if debug:
|
|
396
|
+
import traceback
|
|
397
|
+
traceback.print_exc()
|
|
398
|
+
print(f"Error generating report: {e}", file=sys.stderr)
|
|
399
|
+
sys.exit(1)
|
|
400
|
+
|
|
401
|
+
print(f"\\n HTML: {report_path}")
|
|
402
|
+
print(" Report generated successfully!")
|
diffstory/diff_parser.py
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""Parse unified diff output into structured Python data."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import difflib
|
|
6
|
+
import re
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# Image extensions that can be previewed inline
|
|
11
|
+
IMAGE_EXTENSIONS = frozenset({".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp", ".bmp", ".ico"})
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DiffFile:
|
|
15
|
+
"""Represents a single file in a diff."""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
old_path: str,
|
|
20
|
+
new_path: str,
|
|
21
|
+
status: str = "modified",
|
|
22
|
+
old_mode: Optional[str] = None,
|
|
23
|
+
new_mode: Optional[str] = None,
|
|
24
|
+
similarity: Optional[int] = None,
|
|
25
|
+
):
|
|
26
|
+
self.old_path = old_path
|
|
27
|
+
self.new_path = new_path
|
|
28
|
+
self.display_path = new_path if new_path != "/dev/null" else old_path
|
|
29
|
+
self.status = status # added, deleted, modified, renamed
|
|
30
|
+
self.old_mode = old_mode
|
|
31
|
+
self.new_mode = new_mode
|
|
32
|
+
self.similarity = similarity
|
|
33
|
+
self.hunks: list[Hunk] = []
|
|
34
|
+
self.is_binary_file = False # set True when binary, no hunks
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def is_binary(self) -> bool:
|
|
38
|
+
return not self.hunks and self.status != "added"
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def is_image(self) -> bool:
|
|
42
|
+
"""Check if the file path looks like a known image type."""
|
|
43
|
+
ext = "." + self.display_path.rsplit(".", 1)[-1].lower() if "." in self.display_path else ""
|
|
44
|
+
return ext in IMAGE_EXTENSIONS
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class Hunk:
|
|
48
|
+
"""Represents a single hunk (@@ ... @@ section) in a diff."""
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
old_start: int,
|
|
53
|
+
old_count: int,
|
|
54
|
+
new_start: int,
|
|
55
|
+
new_count: int,
|
|
56
|
+
header: str = "",
|
|
57
|
+
):
|
|
58
|
+
self.old_start = old_start
|
|
59
|
+
self.old_count = old_count
|
|
60
|
+
self.new_start = new_start
|
|
61
|
+
self.new_count = new_count
|
|
62
|
+
self.header = header
|
|
63
|
+
self.lines: list[DiffLine] = []
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class DiffLine:
|
|
67
|
+
"""Represents a single line in a diff."""
|
|
68
|
+
|
|
69
|
+
TYPE_CONTEXT = "context"
|
|
70
|
+
TYPE_ADDITION = "addition"
|
|
71
|
+
TYPE_DELETION = "deletion"
|
|
72
|
+
|
|
73
|
+
def __init__(
|
|
74
|
+
self,
|
|
75
|
+
line_type: str,
|
|
76
|
+
content: str,
|
|
77
|
+
old_lineno: Optional[int] = None,
|
|
78
|
+
new_lineno: Optional[int] = None,
|
|
79
|
+
):
|
|
80
|
+
self.line_type = line_type
|
|
81
|
+
self.content = content
|
|
82
|
+
self.old_lineno = old_lineno
|
|
83
|
+
self.new_lineno = new_lineno
|
|
84
|
+
self.word_diff: Optional[WordDiff] = None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class WordDiff:
|
|
88
|
+
"""Word-level diff for inline edit mode."""
|
|
89
|
+
|
|
90
|
+
def __init__(self, parts: list[dict]):
|
|
91
|
+
self.parts = parts # [{"text": "...", "type": "equal|add|delete"}, ...]
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def parse_diff(diff_text: str) -> list[DiffFile]:
|
|
95
|
+
"""Parse a unified diff string into a list of DiffFile objects."""
|
|
96
|
+
files: list[DiffFile] = []
|
|
97
|
+
current_file: Optional[DiffFile] = None
|
|
98
|
+
current_hunk: Optional[Hunk] = None
|
|
99
|
+
|
|
100
|
+
old_lineno = 0
|
|
101
|
+
new_lineno = 0
|
|
102
|
+
|
|
103
|
+
for line in diff_text.splitlines():
|
|
104
|
+
# Check for file headers
|
|
105
|
+
if line.startswith("diff --git "):
|
|
106
|
+
current_file = _parse_diff_header(line)
|
|
107
|
+
files.append(current_file)
|
|
108
|
+
current_hunk = None
|
|
109
|
+
old_lineno = 0
|
|
110
|
+
new_lineno = 0
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
if current_file is None:
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
# Check for rename/copy info
|
|
117
|
+
if line.startswith("rename from "):
|
|
118
|
+
current_file.old_path = line[12:]
|
|
119
|
+
current_file.status = "renamed"
|
|
120
|
+
continue
|
|
121
|
+
if line.startswith("rename to "):
|
|
122
|
+
current_file.new_path = line[10:]
|
|
123
|
+
current_file.display_path = current_file.new_path
|
|
124
|
+
current_file.status = "renamed"
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
# Similarity index
|
|
128
|
+
if line.startswith("similarity index "):
|
|
129
|
+
try:
|
|
130
|
+
current_file.similarity = int(line[17:].rstrip("%"))
|
|
131
|
+
except ValueError:
|
|
132
|
+
pass
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
# Binary files
|
|
136
|
+
if line.startswith("Binary files ") or line == "Binary files differ":
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
# New file mode
|
|
140
|
+
if line.startswith("new file mode "):
|
|
141
|
+
current_file.status = "added"
|
|
142
|
+
current_file.new_mode = line[14:]
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
# Deleted file mode
|
|
146
|
+
if line.startswith("deleted file mode "):
|
|
147
|
+
current_file.status = "deleted"
|
|
148
|
+
current_file.old_mode = line[18:]
|
|
149
|
+
continue
|
|
150
|
+
|
|
151
|
+
# Old mode / new mode
|
|
152
|
+
if line.startswith("old mode "):
|
|
153
|
+
current_file.old_mode = line[9:]
|
|
154
|
+
continue
|
|
155
|
+
if line.startswith("new mode "):
|
|
156
|
+
current_file.new_mode = line[9:]
|
|
157
|
+
continue
|
|
158
|
+
|
|
159
|
+
# Index line
|
|
160
|
+
if line.startswith("index "):
|
|
161
|
+
continue
|
|
162
|
+
|
|
163
|
+
# --- / +++ lines
|
|
164
|
+
if line.startswith("--- "):
|
|
165
|
+
continue
|
|
166
|
+
if line.startswith("+++ "):
|
|
167
|
+
continue
|
|
168
|
+
|
|
169
|
+
# Hunk header
|
|
170
|
+
if line.startswith("@@"):
|
|
171
|
+
current_hunk = _parse_hunk_header(line)
|
|
172
|
+
current_file.hunks.append(current_hunk)
|
|
173
|
+
old_lineno = current_hunk.old_start
|
|
174
|
+
new_lineno = current_hunk.new_start
|
|
175
|
+
continue
|
|
176
|
+
|
|
177
|
+
if current_hunk is None:
|
|
178
|
+
continue
|
|
179
|
+
|
|
180
|
+
# Diff content lines
|
|
181
|
+
if line.startswith("+"):
|
|
182
|
+
diff_line = DiffLine(DiffLine.TYPE_ADDITION, line[1:], new_lineno=new_lineno)
|
|
183
|
+
current_hunk.lines.append(diff_line)
|
|
184
|
+
new_lineno += 1
|
|
185
|
+
elif line.startswith("-"):
|
|
186
|
+
diff_line = DiffLine(DiffLine.TYPE_DELETION, line[1:], old_lineno=old_lineno)
|
|
187
|
+
current_hunk.lines.append(diff_line)
|
|
188
|
+
old_lineno += 1
|
|
189
|
+
else:
|
|
190
|
+
# Context line (starts with space)
|
|
191
|
+
content = line[1:] if len(line) > 1 else ""
|
|
192
|
+
diff_line = DiffLine(DiffLine.TYPE_CONTEXT, content, old_lineno, new_lineno)
|
|
193
|
+
current_hunk.lines.append(diff_line)
|
|
194
|
+
old_lineno += 1
|
|
195
|
+
new_lineno += 1
|
|
196
|
+
|
|
197
|
+
return files
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _parse_diff_header(line: str) -> DiffFile:
|
|
201
|
+
"""Parse 'diff --git a/path b/path' header."""
|
|
202
|
+
# Extract paths after 'diff --git '
|
|
203
|
+
rest = line[11:]
|
|
204
|
+
parts = rest.split(" b/", 1)
|
|
205
|
+
if len(parts) == 2:
|
|
206
|
+
old_path = parts[0][2:] if parts[0].startswith("a/") else parts[0]
|
|
207
|
+
new_path = parts[1]
|
|
208
|
+
else:
|
|
209
|
+
old_path = rest
|
|
210
|
+
new_path = rest
|
|
211
|
+
return DiffFile(old_path, new_path)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
HUNK_HEADER_RE = re.compile(r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)")
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _parse_hunk_header(line: str) -> Hunk:
|
|
218
|
+
"""Parse @@ -old,count +new,count @@ header."""
|
|
219
|
+
match = HUNK_HEADER_RE.match(line)
|
|
220
|
+
if match:
|
|
221
|
+
old_start = int(match.group(1))
|
|
222
|
+
old_count = int(match.group(2) or 1)
|
|
223
|
+
new_start = int(match.group(3))
|
|
224
|
+
new_count = int(match.group(4) or 1)
|
|
225
|
+
header = match.group(5).strip()
|
|
226
|
+
return Hunk(old_start, old_count, new_start, new_count, header)
|
|
227
|
+
return Hunk(0, 0, 0, 0)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def compute_word_diff(old_line: str, new_line: str) -> WordDiff:
|
|
231
|
+
"""Compute word-level diff between two lines for inline edit mode."""
|
|
232
|
+
# Tokenize words (split on word boundaries, preserving whitespace)
|
|
233
|
+
old_tokens = _tokenize(old_line)
|
|
234
|
+
new_tokens = _tokenize(new_line)
|
|
235
|
+
|
|
236
|
+
matcher = difflib.SequenceMatcher(None, old_tokens, new_tokens)
|
|
237
|
+
parts: list[dict] = []
|
|
238
|
+
|
|
239
|
+
for op, old_start, old_end, new_start, new_end in matcher.get_opcodes():
|
|
240
|
+
if op == "equal":
|
|
241
|
+
for t in old_tokens[old_start:old_end]:
|
|
242
|
+
parts.append({"text": t, "type": "equal"})
|
|
243
|
+
elif op == "replace":
|
|
244
|
+
# Show deleted tokens then added tokens
|
|
245
|
+
for t in old_tokens[old_start:old_end]:
|
|
246
|
+
parts.append({"text": t, "type": "delete"})
|
|
247
|
+
for t in new_tokens[new_start:new_end]:
|
|
248
|
+
parts.append({"text": t, "type": "add"})
|
|
249
|
+
elif op == "delete":
|
|
250
|
+
for t in old_tokens[old_start:old_end]:
|
|
251
|
+
parts.append({"text": t, "type": "delete"})
|
|
252
|
+
elif op == "insert":
|
|
253
|
+
for t in new_tokens[new_start:new_end]:
|
|
254
|
+
parts.append({"text": t, "type": "add"})
|
|
255
|
+
|
|
256
|
+
return WordDiff(parts)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _tokenize(text: str) -> list[str]:
|
|
260
|
+
"""Tokenize text into words, preserving whitespace."""
|
|
261
|
+
tokens: list[str] = []
|
|
262
|
+
current = ""
|
|
263
|
+
for char in text:
|
|
264
|
+
if char.isspace():
|
|
265
|
+
if current:
|
|
266
|
+
tokens.append(current)
|
|
267
|
+
current = ""
|
|
268
|
+
tokens.append(char)
|
|
269
|
+
else:
|
|
270
|
+
current += char
|
|
271
|
+
if current:
|
|
272
|
+
tokens.append(current)
|
|
273
|
+
return tokens
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def compute_word_diffs(file: DiffFile) -> None:
|
|
277
|
+
"""Compute word-level diffs for all applicable lines in a file."""
|
|
278
|
+
for hunk in file.hunks:
|
|
279
|
+
additions: list[DiffLine] = []
|
|
280
|
+
deletions: list[DiffLine] = []
|
|
281
|
+
|
|
282
|
+
for line in hunk.lines:
|
|
283
|
+
if line.line_type == DiffLine.TYPE_ADDITION:
|
|
284
|
+
additions.append(line)
|
|
285
|
+
elif line.line_type == DiffLine.TYPE_DELETION:
|
|
286
|
+
deletions.append(line)
|
|
287
|
+
|
|
288
|
+
# Pair additions with deletions (simple approach: match by position)
|
|
289
|
+
add_idx = 0
|
|
290
|
+
del_idx = 0
|
|
291
|
+
while add_idx < len(additions) and del_idx < len(deletions):
|
|
292
|
+
add_line = additions[add_idx]
|
|
293
|
+
del_line = deletions[del_idx]
|
|
294
|
+
word_diff = compute_word_diff(del_line.content, add_line.content)
|
|
295
|
+
add_line.word_diff = word_diff
|
|
296
|
+
del_line.word_diff = word_diff
|
|
297
|
+
add_idx += 1
|
|
298
|
+
del_idx += 1
|