diffstory 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
diffstory/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """DiffStory — Transform Git diffs into rich, interactive HTML reports."""
2
+
3
+ __version__ = "0.2.0"
diffstory/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Allow running diffstory with `python -m diffstory`."""
2
+ from diffstory.cli import main
3
+
4
+ if __name__ == "__main__":
5
+ main()
diffstory/cli.py ADDED
@@ -0,0 +1,402 @@
1
+ """CLI entry point for DiffStory — parse arguments, gather diffs, generate reports."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import os
7
+ import sys
8
+ from pathlib import Path
9
+ from typing import Optional
10
+
11
+ from diffstory import __version__
12
+ from diffstory.diff_parser import parse_diff
13
+ from diffstory.git_utils import (
14
+ GitError,
15
+ check_git_repo,
16
+ get_diff,
17
+ get_diff_with_renames,
18
+ )
19
+ from diffstory.html_generator import generate_report
20
+
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Config file (.diffstory.toml) loader
24
+ # ---------------------------------------------------------------------------
25
+ DEFAULT_CONFIG_PATH = Path.home() / ".diffstory.toml"
26
+ LOCAL_CONFIG_PATH = Path.cwd() / ".diffstory.toml"
27
+
28
+
29
+ def _load_config() -> dict:
30
+ """Load .diffstory.toml config from project or home directory.
31
+
32
+ Returns a dict with keys that serve as defaults for CLI flags.
33
+ """
34
+ config: dict = {}
35
+
36
+ for cfg_path in (LOCAL_CONFIG_PATH, DEFAULT_CONFIG_PATH):
37
+ if cfg_path.exists():
38
+ try:
39
+ text = cfg_path.read_text(encoding="utf-8")
40
+ config.update(_parse_toml_like(text))
41
+ except Exception:
42
+ pass # ignore broken config files
43
+ return config
44
+
45
+
46
+ def _parse_toml_like(text: str) -> dict:
47
+ """Minimal TOML parser — enough for our trivial config schema."""
48
+ result: dict = {}
49
+ current_section: Optional[str] = None
50
+ for line in text.splitlines():
51
+ stripped = line.strip()
52
+ if not stripped or stripped.startswith("#"):
53
+ continue
54
+ if stripped.startswith("[") and stripped.endswith("]"):
55
+ current_section = stripped[1:-1].strip().lower()
56
+ continue
57
+ if "=" in stripped:
58
+ key, _, val = stripped.partition("=")
59
+ key = key.strip().lower()
60
+ val = val.strip().strip('"').strip("'")
61
+ # Lower-case boolean-ish strings
62
+ if val.lower() in ("true", "yes", "on"):
63
+ val = True
64
+ elif val.lower() in ("false", "no", "off"):
65
+ val = False
66
+ else:
67
+ try:
68
+ val = int(val)
69
+ except ValueError:
70
+ pass
71
+ full_key = f"{current_section}.{key}" if current_section else key
72
+ result[full_key] = val
73
+ return result
74
+
75
+
76
+ def build_parser() -> argparse.ArgumentParser:
77
+ """Build the CLI argument parser."""
78
+ parser = argparse.ArgumentParser(
79
+ prog="diffstory",
80
+ description="Transform Git diffs into rich, interactive, self-contained HTML reports.",
81
+ epilog=(
82
+ "Examples:\\n"
83
+ " diffstory # working tree diff\\n"
84
+ " diffstory --staged # staged changes\\n"
85
+ " diffstory HEAD~3 HEAD # commit comparison\\n"
86
+ " diffstory main feature # branch comparison\\n"
87
+ " diffstory -o report.html # custom output file\\n"
88
+ " diffstory --json # JSON export\\n"
89
+ " diffstory HEAD~3 HEAD src/ # restrict to path"
90
+ ),
91
+ formatter_class=argparse.RawDescriptionHelpFormatter,
92
+ )
93
+
94
+ parser.add_argument(
95
+ "--version",
96
+ action="version",
97
+ version=f"diffstory {__version__}",
98
+ )
99
+
100
+ parser.add_argument(
101
+ "--staged",
102
+ action="store_true",
103
+ help="Show staged changes (equivalent to git diff --cached)",
104
+ )
105
+
106
+ parser.add_argument(
107
+ "revisions",
108
+ nargs="*",
109
+ metavar="REVISION",
110
+ help="Optional commit range: REVISION [REVISION] [-- path]",
111
+ )
112
+
113
+ parser.add_argument(
114
+ "-o",
115
+ "--output",
116
+ default="diffstory-report.html",
117
+ metavar="FILE",
118
+ help="Output file path (default: diffstory-report.html)",
119
+ )
120
+
121
+ parser.add_argument(
122
+ "--json",
123
+ action="store_true",
124
+ help="Export diff data as JSON",
125
+ )
126
+
127
+ parser.add_argument(
128
+ "--md",
129
+ action="store_true",
130
+ help="Export diff summary as Markdown",
131
+ )
132
+
133
+ parser.add_argument(
134
+ "--csv",
135
+ action="store_true",
136
+ help="Export diff stats as CSV",
137
+ )
138
+
139
+ parser.add_argument(
140
+ "--diff",
141
+ metavar="FILE",
142
+ help="Generate report from a diff file directly (no git repository needed)",
143
+ )
144
+
145
+ parser.add_argument(
146
+ "--verbose", "-v",
147
+ action="store_true",
148
+ default=None,
149
+ help="Show git commands and timing information",
150
+ )
151
+
152
+ parser.add_argument(
153
+ "--debug",
154
+ action="store_true",
155
+ default=None,
156
+ help="Show detailed debug output including stack traces",
157
+ )
158
+
159
+ return parser
160
+
161
+
162
+ def _parse_revisions(args: argparse.Namespace) -> tuple[Optional[str], Optional[str], Optional[list[str]]]:
163
+ """Extract commit range and optional path restriction from positional args.
164
+
165
+ Supports:
166
+ [] -> working tree
167
+ [COMMIT_A, COMMIT_B] -> commit comparison
168
+ [COMMIT_A, COMMIT_B, --, path] -> restricted comparison
169
+ [COMMIT] -> comparison with HEAD
170
+ """
171
+ revisions = args.revisions
172
+ paths: Optional[list[str]] = None
173
+ commit_a: Optional[str] = None
174
+ commit_b: Optional[str] = None
175
+
176
+ if not revisions:
177
+ return None, None, None
178
+
179
+ # Check for path separator
180
+ if "--" in revisions:
181
+ sep_idx = revisions.index("--")
182
+ revisions = revisions[:sep_idx]
183
+ paths = revisions[sep_idx + 1:]
184
+
185
+ if len(revisions) == 1:
186
+ commit_a = revisions[0]
187
+ elif len(revisions) >= 2:
188
+ commit_a = revisions[0]
189
+ commit_b = revisions[1]
190
+
191
+ return commit_a, commit_b, paths
192
+
193
+
194
+ def generate_exports(
195
+ files,
196
+ output_path: str,
197
+ json_export: bool = False,
198
+ md_export: bool = False,
199
+ csv_export: bool = False,
200
+ ) -> None:
201
+ """Generate non-HTML export formats."""
202
+ base = Path(output_path)
203
+ stem = base.stem
204
+
205
+ if json_export:
206
+ _export_json(files, base.with_name(stem + ".json"))
207
+ if md_export:
208
+ _export_markdown(files, base.with_name(stem + ".md"))
209
+ if csv_export:
210
+ _export_csv(files, base.with_name(stem + ".csv"))
211
+
212
+
213
+ def _export_json(files, output_path: Path) -> None:
214
+ """Export diff data as JSON."""
215
+ import json
216
+
217
+ data = []
218
+ for f in files:
219
+ file_data = {
220
+ "old_path": f.old_path,
221
+ "new_path": f.new_path,
222
+ "status": f.status,
223
+ "hunks": [],
224
+ }
225
+ for hunk in f.hunks:
226
+ hunk_data = {
227
+ "old_start": hunk.old_start,
228
+ "old_count": hunk.old_count,
229
+ "new_start": hunk.new_start,
230
+ "new_count": hunk.new_count,
231
+ "lines": [],
232
+ }
233
+ for line in hunk.lines:
234
+ hunk_data["lines"].append({
235
+ "type": line.line_type,
236
+ "content": line.content,
237
+ "old_lineno": line.old_lineno,
238
+ "new_lineno": line.new_lineno,
239
+ })
240
+ file_data["hunks"].append(hunk_data)
241
+ data.append(file_data)
242
+
243
+ output_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
244
+ print(f" JSON: {output_path}")
245
+
246
+
247
+ def _export_markdown(files, output_path: Path) -> None:
248
+ """Export diff summary as Markdown."""
249
+ lines = ["# DiffStory Report\n"]
250
+ for f in files:
251
+ adds = sum(1 for h in f.hunks for l in h.lines if l.line_type == "addition")
252
+ dels = sum(1 for h in f.hunks for l in h.lines if l.line_type == "deletion")
253
+ status_icon = {"added": "+", "deleted": "-", "renamed": "→", "modified": "~"}.get(f.status, "~")
254
+ lines.append(f"## {status_icon} `{f.display_path}`")
255
+ lines.append(f"- **Status:** {f.status}")
256
+ lines.append(f"- **Additions:** {adds}")
257
+ lines.append(f"- **Deletions:** {dels}\n")
258
+ for hunk in f.hunks:
259
+ lines.append(f"### @@ {hunk.old_start},{hunk.old_count} {hunk.new_start},{hunk.new_count} @@")
260
+ if hunk.header:
261
+ lines.append(f"_{hunk.header}_\n")
262
+ for line in hunk.lines:
263
+ prefix = {"context": " ", "addition": "+", "deletion": "-"}[line.line_type]
264
+ lines.append(f" {prefix} {line.content}")
265
+ lines.append("")
266
+
267
+ output_path.write_text("\n".join(lines), encoding="utf-8")
268
+ print(f" Markdown: {output_path}")
269
+
270
+
271
+ def _export_csv(files, output_path: Path) -> None:
272
+ """Export diff stats as CSV."""
273
+ rows = ["file,status,additions,deletions"]
274
+ for f in files:
275
+ adds = sum(1 for h in f.hunks for l in h.lines if l.line_type == "addition")
276
+ dels = sum(1 for h in f.hunks for l in h.lines if l.line_type == "deletion")
277
+ rows.append(f"{f.display_path},{f.status},{adds},{dels}")
278
+
279
+ output_path.write_text("\n".join(rows) + "\n", encoding="utf-8")
280
+ print(f" CSV: {output_path}")
281
+
282
+
283
+ def _read_diff_from_file(path: str) -> str:
284
+ """Read diff content from a file."""
285
+ try:
286
+ return Path(path).read_text(encoding="utf-8")
287
+ except FileNotFoundError:
288
+ print(f"Error: Diff file not found: {path}", file=sys.stderr)
289
+ sys.exit(1)
290
+ except Exception as e:
291
+ print(f"Error reading diff file: {e}", file=sys.stderr)
292
+ sys.exit(1)
293
+
294
+
295
+ def main() -> None:
296
+ """Main entry point for the diffstory CLI."""
297
+ parser = build_parser()
298
+ args = parser.parse_args()
299
+
300
+ # Load config file for defaults
301
+ config = _load_config()
302
+ verbose = args.verbose if args.verbose is not None else config.get("cli.verbose", False)
303
+ debug = args.debug if args.debug is not None else config.get("cli.debug", False)
304
+
305
+ if debug:
306
+ verbose = True # debug implies verbose
307
+
308
+ # Handle --diff flag (read diff from file, no git needed)
309
+ if args.diff:
310
+ if verbose:
311
+ print(f" Reading diff file: {args.diff}")
312
+ diff_text = _read_diff_from_file(args.diff)
313
+ commit_a = None
314
+ commit_b = None
315
+ files = parse_diff(diff_text)
316
+ if not files:
317
+ print("No parseable diff files found.")
318
+ sys.exit(0)
319
+ has_exports = args.json or args.md or args.csv
320
+ if has_exports:
321
+ generate_exports(files, args.output, args.json, args.md, args.csv)
322
+ try:
323
+ report_path = generate_report(files, output_path=args.output, repo_name="diff", verbose=verbose)
324
+ except Exception as e:
325
+ if debug:
326
+ import traceback
327
+ traceback.print_exc()
328
+ print(f"Error generating report: {e}", file=sys.stderr)
329
+ sys.exit(1)
330
+ print(f"\\n HTML: {report_path}")
331
+ print(" Report generated successfully!")
332
+ return
333
+
334
+ # Validate Git repository
335
+ if not check_git_repo():
336
+ print("Error: Not inside a Git repository.", file=sys.stderr)
337
+ sys.exit(1)
338
+
339
+ # Parse revisions
340
+ commit_a, commit_b, paths = _parse_revisions(args)
341
+
342
+ if verbose:
343
+ rev_desc = "staged" if args.staged else "working tree"
344
+ if commit_a and commit_b:
345
+ rev_desc = f"{commit_a}..{commit_b}"
346
+ elif commit_a:
347
+ rev_desc = commit_a
348
+ print(f" Diff: {rev_desc}")
349
+
350
+ try:
351
+ # Get diff
352
+ diff_text = get_diff_with_renames(
353
+ staged=args.staged,
354
+ commit_a=commit_a,
355
+ commit_b=commit_b,
356
+ paths=paths,
357
+ )
358
+ except GitError as e:
359
+ print(f"Error fetching diff: {e}", file=sys.stderr)
360
+ sys.exit(1)
361
+
362
+ if not diff_text.strip():
363
+ print("No changes detected.")
364
+ sys.exit(0)
365
+
366
+ if verbose:
367
+ print(f" Diff size: {len(diff_text)} bytes")
368
+
369
+ # Parse diff
370
+ files = parse_diff(diff_text)
371
+
372
+ if not files:
373
+ print("No parseable diff files found.")
374
+ sys.exit(0)
375
+
376
+ if verbose:
377
+ print(f" Files changed: {len(files)}")
378
+
379
+ # Generate exports if requested
380
+ has_exports = args.json or args.md or args.csv
381
+ if has_exports:
382
+ generate_exports(files, args.output, args.json, args.md, args.csv)
383
+
384
+ # Always generate HTML report
385
+ try:
386
+ report_path = generate_report(
387
+ files,
388
+ output_path=args.output,
389
+ staged=args.staged,
390
+ commit_a=commit_a,
391
+ commit_b=commit_b,
392
+ verbose=verbose,
393
+ )
394
+ except Exception as e:
395
+ if debug:
396
+ import traceback
397
+ traceback.print_exc()
398
+ print(f"Error generating report: {e}", file=sys.stderr)
399
+ sys.exit(1)
400
+
401
+ print(f"\\n HTML: {report_path}")
402
+ print(" Report generated successfully!")
@@ -0,0 +1,298 @@
1
+ """Parse unified diff output into structured Python data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import difflib
6
+ import re
7
+ from typing import Optional
8
+
9
+
10
+ # Image extensions that can be previewed inline
11
+ IMAGE_EXTENSIONS = frozenset({".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp", ".bmp", ".ico"})
12
+
13
+
14
+ class DiffFile:
15
+ """Represents a single file in a diff."""
16
+
17
+ def __init__(
18
+ self,
19
+ old_path: str,
20
+ new_path: str,
21
+ status: str = "modified",
22
+ old_mode: Optional[str] = None,
23
+ new_mode: Optional[str] = None,
24
+ similarity: Optional[int] = None,
25
+ ):
26
+ self.old_path = old_path
27
+ self.new_path = new_path
28
+ self.display_path = new_path if new_path != "/dev/null" else old_path
29
+ self.status = status # added, deleted, modified, renamed
30
+ self.old_mode = old_mode
31
+ self.new_mode = new_mode
32
+ self.similarity = similarity
33
+ self.hunks: list[Hunk] = []
34
+ self.is_binary_file = False # set True when binary, no hunks
35
+
36
+ @property
37
+ def is_binary(self) -> bool:
38
+ return not self.hunks and self.status != "added"
39
+
40
+ @property
41
+ def is_image(self) -> bool:
42
+ """Check if the file path looks like a known image type."""
43
+ ext = "." + self.display_path.rsplit(".", 1)[-1].lower() if "." in self.display_path else ""
44
+ return ext in IMAGE_EXTENSIONS
45
+
46
+
47
+ class Hunk:
48
+ """Represents a single hunk (@@ ... @@ section) in a diff."""
49
+
50
+ def __init__(
51
+ self,
52
+ old_start: int,
53
+ old_count: int,
54
+ new_start: int,
55
+ new_count: int,
56
+ header: str = "",
57
+ ):
58
+ self.old_start = old_start
59
+ self.old_count = old_count
60
+ self.new_start = new_start
61
+ self.new_count = new_count
62
+ self.header = header
63
+ self.lines: list[DiffLine] = []
64
+
65
+
66
+ class DiffLine:
67
+ """Represents a single line in a diff."""
68
+
69
+ TYPE_CONTEXT = "context"
70
+ TYPE_ADDITION = "addition"
71
+ TYPE_DELETION = "deletion"
72
+
73
+ def __init__(
74
+ self,
75
+ line_type: str,
76
+ content: str,
77
+ old_lineno: Optional[int] = None,
78
+ new_lineno: Optional[int] = None,
79
+ ):
80
+ self.line_type = line_type
81
+ self.content = content
82
+ self.old_lineno = old_lineno
83
+ self.new_lineno = new_lineno
84
+ self.word_diff: Optional[WordDiff] = None
85
+
86
+
87
+ class WordDiff:
88
+ """Word-level diff for inline edit mode."""
89
+
90
+ def __init__(self, parts: list[dict]):
91
+ self.parts = parts # [{"text": "...", "type": "equal|add|delete"}, ...]
92
+
93
+
94
+ def parse_diff(diff_text: str) -> list[DiffFile]:
95
+ """Parse a unified diff string into a list of DiffFile objects."""
96
+ files: list[DiffFile] = []
97
+ current_file: Optional[DiffFile] = None
98
+ current_hunk: Optional[Hunk] = None
99
+
100
+ old_lineno = 0
101
+ new_lineno = 0
102
+
103
+ for line in diff_text.splitlines():
104
+ # Check for file headers
105
+ if line.startswith("diff --git "):
106
+ current_file = _parse_diff_header(line)
107
+ files.append(current_file)
108
+ current_hunk = None
109
+ old_lineno = 0
110
+ new_lineno = 0
111
+ continue
112
+
113
+ if current_file is None:
114
+ continue
115
+
116
+ # Check for rename/copy info
117
+ if line.startswith("rename from "):
118
+ current_file.old_path = line[12:]
119
+ current_file.status = "renamed"
120
+ continue
121
+ if line.startswith("rename to "):
122
+ current_file.new_path = line[10:]
123
+ current_file.display_path = current_file.new_path
124
+ current_file.status = "renamed"
125
+ continue
126
+
127
+ # Similarity index
128
+ if line.startswith("similarity index "):
129
+ try:
130
+ current_file.similarity = int(line[17:].rstrip("%"))
131
+ except ValueError:
132
+ pass
133
+ continue
134
+
135
+ # Binary files
136
+ if line.startswith("Binary files ") or line == "Binary files differ":
137
+ continue
138
+
139
+ # New file mode
140
+ if line.startswith("new file mode "):
141
+ current_file.status = "added"
142
+ current_file.new_mode = line[14:]
143
+ continue
144
+
145
+ # Deleted file mode
146
+ if line.startswith("deleted file mode "):
147
+ current_file.status = "deleted"
148
+ current_file.old_mode = line[18:]
149
+ continue
150
+
151
+ # Old mode / new mode
152
+ if line.startswith("old mode "):
153
+ current_file.old_mode = line[9:]
154
+ continue
155
+ if line.startswith("new mode "):
156
+ current_file.new_mode = line[9:]
157
+ continue
158
+
159
+ # Index line
160
+ if line.startswith("index "):
161
+ continue
162
+
163
+ # --- / +++ lines
164
+ if line.startswith("--- "):
165
+ continue
166
+ if line.startswith("+++ "):
167
+ continue
168
+
169
+ # Hunk header
170
+ if line.startswith("@@"):
171
+ current_hunk = _parse_hunk_header(line)
172
+ current_file.hunks.append(current_hunk)
173
+ old_lineno = current_hunk.old_start
174
+ new_lineno = current_hunk.new_start
175
+ continue
176
+
177
+ if current_hunk is None:
178
+ continue
179
+
180
+ # Diff content lines
181
+ if line.startswith("+"):
182
+ diff_line = DiffLine(DiffLine.TYPE_ADDITION, line[1:], new_lineno=new_lineno)
183
+ current_hunk.lines.append(diff_line)
184
+ new_lineno += 1
185
+ elif line.startswith("-"):
186
+ diff_line = DiffLine(DiffLine.TYPE_DELETION, line[1:], old_lineno=old_lineno)
187
+ current_hunk.lines.append(diff_line)
188
+ old_lineno += 1
189
+ else:
190
+ # Context line (starts with space)
191
+ content = line[1:] if len(line) > 1 else ""
192
+ diff_line = DiffLine(DiffLine.TYPE_CONTEXT, content, old_lineno, new_lineno)
193
+ current_hunk.lines.append(diff_line)
194
+ old_lineno += 1
195
+ new_lineno += 1
196
+
197
+ return files
198
+
199
+
200
+ def _parse_diff_header(line: str) -> DiffFile:
201
+ """Parse 'diff --git a/path b/path' header."""
202
+ # Extract paths after 'diff --git '
203
+ rest = line[11:]
204
+ parts = rest.split(" b/", 1)
205
+ if len(parts) == 2:
206
+ old_path = parts[0][2:] if parts[0].startswith("a/") else parts[0]
207
+ new_path = parts[1]
208
+ else:
209
+ old_path = rest
210
+ new_path = rest
211
+ return DiffFile(old_path, new_path)
212
+
213
+
214
+ HUNK_HEADER_RE = re.compile(r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)")
215
+
216
+
217
+ def _parse_hunk_header(line: str) -> Hunk:
218
+ """Parse @@ -old,count +new,count @@ header."""
219
+ match = HUNK_HEADER_RE.match(line)
220
+ if match:
221
+ old_start = int(match.group(1))
222
+ old_count = int(match.group(2) or 1)
223
+ new_start = int(match.group(3))
224
+ new_count = int(match.group(4) or 1)
225
+ header = match.group(5).strip()
226
+ return Hunk(old_start, old_count, new_start, new_count, header)
227
+ return Hunk(0, 0, 0, 0)
228
+
229
+
230
+ def compute_word_diff(old_line: str, new_line: str) -> WordDiff:
231
+ """Compute word-level diff between two lines for inline edit mode."""
232
+ # Tokenize words (split on word boundaries, preserving whitespace)
233
+ old_tokens = _tokenize(old_line)
234
+ new_tokens = _tokenize(new_line)
235
+
236
+ matcher = difflib.SequenceMatcher(None, old_tokens, new_tokens)
237
+ parts: list[dict] = []
238
+
239
+ for op, old_start, old_end, new_start, new_end in matcher.get_opcodes():
240
+ if op == "equal":
241
+ for t in old_tokens[old_start:old_end]:
242
+ parts.append({"text": t, "type": "equal"})
243
+ elif op == "replace":
244
+ # Show deleted tokens then added tokens
245
+ for t in old_tokens[old_start:old_end]:
246
+ parts.append({"text": t, "type": "delete"})
247
+ for t in new_tokens[new_start:new_end]:
248
+ parts.append({"text": t, "type": "add"})
249
+ elif op == "delete":
250
+ for t in old_tokens[old_start:old_end]:
251
+ parts.append({"text": t, "type": "delete"})
252
+ elif op == "insert":
253
+ for t in new_tokens[new_start:new_end]:
254
+ parts.append({"text": t, "type": "add"})
255
+
256
+ return WordDiff(parts)
257
+
258
+
259
+ def _tokenize(text: str) -> list[str]:
260
+ """Tokenize text into words, preserving whitespace."""
261
+ tokens: list[str] = []
262
+ current = ""
263
+ for char in text:
264
+ if char.isspace():
265
+ if current:
266
+ tokens.append(current)
267
+ current = ""
268
+ tokens.append(char)
269
+ else:
270
+ current += char
271
+ if current:
272
+ tokens.append(current)
273
+ return tokens
274
+
275
+
276
+ def compute_word_diffs(file: DiffFile) -> None:
277
+ """Compute word-level diffs for all applicable lines in a file."""
278
+ for hunk in file.hunks:
279
+ additions: list[DiffLine] = []
280
+ deletions: list[DiffLine] = []
281
+
282
+ for line in hunk.lines:
283
+ if line.line_type == DiffLine.TYPE_ADDITION:
284
+ additions.append(line)
285
+ elif line.line_type == DiffLine.TYPE_DELETION:
286
+ deletions.append(line)
287
+
288
+ # Pair additions with deletions (simple approach: match by position)
289
+ add_idx = 0
290
+ del_idx = 0
291
+ while add_idx < len(additions) and del_idx < len(deletions):
292
+ add_line = additions[add_idx]
293
+ del_line = deletions[del_idx]
294
+ word_diff = compute_word_diff(del_line.content, add_line.content)
295
+ add_line.word_diff = word_diff
296
+ del_line.word_diff = word_diff
297
+ add_idx += 1
298
+ del_idx += 1