patch-fixer 0.3.4__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/PKG-INFO +14 -2
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/README.md +13 -1
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/patch_fixer/patch_fixer.py +174 -44
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/patch_fixer.egg-info/PKG-INFO +14 -2
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/patch_fixer.egg-info/SOURCES.txt +1 -0
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/pyproject.toml +1 -1
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/tests/test_cli.py +96 -33
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/tests/test_fuzzy.py +7 -5
- patch_fixer-0.4.0/tests/test_hunk_finding.py +160 -0
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/tests/test_repos.py +1 -1
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/LICENSE +0 -0
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/patch_fixer/__init__.py +0 -0
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/patch_fixer/cli.py +0 -0
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/patch_fixer/split.py +0 -0
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/patch_fixer.egg-info/dependency_links.txt +0 -0
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/patch_fixer.egg-info/entry_points.txt +0 -0
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/patch_fixer.egg-info/requires.txt +0 -0
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/patch_fixer.egg-info/top_level.txt +0 -0
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/setup.cfg +0 -0
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/tests/test_norm.py +0 -0
- {patch_fixer-0.3.4 → patch_fixer-0.4.0}/tests/test_split.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: patch-fixer
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.0
|
4
4
|
Summary: Fixes erroneous git apply patches to the best of its ability.
|
5
5
|
Maintainer-email: Alex Mueller <amueller474@gmail.com>
|
6
6
|
License-Expression: Apache-2.0
|
@@ -55,6 +55,11 @@ where:
|
|
55
55
|
- `broken.patch` is the malformed patch generated by the LLM
|
56
56
|
- `fixed.patch` is the output file containing the (hopefully) fixed patch
|
57
57
|
|
58
|
+
Options:
|
59
|
+
- `--fuzzy`: enable fuzzy string matching for better context matching (experimental)
|
60
|
+
- `--add-newline`: add final newlines when processing "No newline at end of file" markers
|
61
|
+
|
62
|
+
|
58
63
|
#### Splitting patches by file:
|
59
64
|
```bash
|
60
65
|
# Split with files specified on command line
|
@@ -81,9 +86,16 @@ original = "/path/to/original/state" # file or directory being patched
|
|
81
86
|
with open(patch_file, encoding="utf-8") as f:
|
82
87
|
patch_lines = f.readlines()
|
83
88
|
|
89
|
+
# basic usage
|
84
90
|
fixed_lines = fix_patch(patch_lines, original)
|
85
|
-
output_file = "/path/to/fixed.patch"
|
86
91
|
|
92
|
+
# with fuzzy matching enabled
|
93
|
+
fixed_lines = fix_patch(patch_lines, original, fuzzy=True)
|
94
|
+
|
95
|
+
# with final newline addition
|
96
|
+
fixed_lines = fix_patch(patch_lines, original, add_newline=True)
|
97
|
+
|
98
|
+
output_file = "/path/to/fixed.patch"
|
87
99
|
with open(output_file, 'w', encoding='utf-8') as f:
|
88
100
|
f.writelines(fixed_lines)
|
89
101
|
```
|
@@ -26,6 +26,11 @@ where:
|
|
26
26
|
- `broken.patch` is the malformed patch generated by the LLM
|
27
27
|
- `fixed.patch` is the output file containing the (hopefully) fixed patch
|
28
28
|
|
29
|
+
Options:
|
30
|
+
- `--fuzzy`: enable fuzzy string matching for better context matching (experimental)
|
31
|
+
- `--add-newline`: add final newlines when processing "No newline at end of file" markers
|
32
|
+
|
33
|
+
|
29
34
|
#### Splitting patches by file:
|
30
35
|
```bash
|
31
36
|
# Split with files specified on command line
|
@@ -52,9 +57,16 @@ original = "/path/to/original/state" # file or directory being patched
|
|
52
57
|
with open(patch_file, encoding="utf-8") as f:
|
53
58
|
patch_lines = f.readlines()
|
54
59
|
|
60
|
+
# basic usage
|
55
61
|
fixed_lines = fix_patch(patch_lines, original)
|
56
|
-
output_file = "/path/to/fixed.patch"
|
57
62
|
|
63
|
+
# with fuzzy matching enabled
|
64
|
+
fixed_lines = fix_patch(patch_lines, original, fuzzy=True)
|
65
|
+
|
66
|
+
# with final newline addition
|
67
|
+
fixed_lines = fix_patch(patch_lines, original, add_newline=True)
|
68
|
+
|
69
|
+
output_file = "/path/to/fixed.patch"
|
58
70
|
with open(output_file, 'w', encoding='utf-8') as f:
|
59
71
|
f.writelines(fixed_lines)
|
60
72
|
```
|
@@ -17,13 +17,49 @@ regexes = {
|
|
17
17
|
"RENAME_TO": re.compile(rf'rename to ({path_regex})'),
|
18
18
|
"FILE_HEADER_START": re.compile(rf'--- (a/{path_regex}|/dev/null)'),
|
19
19
|
"FILE_HEADER_END": re.compile(rf'\+\+\+ (b/{path_regex}|/dev/null)'),
|
20
|
-
"HUNK_HEADER": re.compile(r'^@@ -(\d+)
|
20
|
+
"HUNK_HEADER": re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)$'),
|
21
21
|
"END_LINE": re.compile(r'\')
|
22
22
|
}
|
23
23
|
|
24
24
|
|
25
|
-
class
|
26
|
-
|
25
|
+
class HunkErrorBase(Exception):
|
26
|
+
def __init__(self, hunk_lines, file="(unknown file)"):
|
27
|
+
super().__init__()
|
28
|
+
self.hunk = "".join(hunk_lines)
|
29
|
+
self.file = file
|
30
|
+
|
31
|
+
def format_hunk_for_error(self):
|
32
|
+
"""Format hunk for error messages, showing only context and deletion lines."""
|
33
|
+
error_lines = []
|
34
|
+
for line in self.hunk.splitlines(keepends=True):
|
35
|
+
if line.startswith((' ', '-')): # context or deletion lines
|
36
|
+
error_lines.append(line)
|
37
|
+
# skip addition lines (+) as they shouldn't be in the original file
|
38
|
+
return ''.join(error_lines)
|
39
|
+
|
40
|
+
def add_file(self, file):
|
41
|
+
self.file = file
|
42
|
+
|
43
|
+
|
44
|
+
class MissingHunkError(HunkErrorBase):
|
45
|
+
def __str__(self):
|
46
|
+
return (f"Could not find hunk in {self.file}:"
|
47
|
+
f"\n================================"
|
48
|
+
f"\n{self.format_hunk_for_error()}"
|
49
|
+
f"================================")
|
50
|
+
|
51
|
+
|
52
|
+
class OutOfOrderHunk(HunkErrorBase):
|
53
|
+
def __init__(self, hunk_lines, prev_header, file="(unknown file)"):
|
54
|
+
super().__init__(hunk_lines, file)
|
55
|
+
self.prev_header = prev_header
|
56
|
+
|
57
|
+
def __str__(self):
|
58
|
+
return (f"Out of order hunk in {self.file}:"
|
59
|
+
f"\n==============================="
|
60
|
+
f"\n{self.format_hunk_for_error()}"
|
61
|
+
f"==============================="
|
62
|
+
f"\nOccurs before previous hunk with header {self.prev_header}")
|
27
63
|
|
28
64
|
|
29
65
|
class BadCarriageReturn(ValueError):
|
@@ -64,11 +100,12 @@ def normalize_line(line):
|
|
64
100
|
|
65
101
|
def fuzzy_line_similarity(line1, line2, threshold=0.8):
|
66
102
|
"""Calculate similarity between two lines using a simple ratio."""
|
67
|
-
if not line1 or not line2:
|
68
|
-
return 0.0
|
69
|
-
|
70
103
|
l1, l2 = line1.strip(), line2.strip()
|
71
104
|
|
105
|
+
# empty strings are identical
|
106
|
+
if len(l1) == 0 and len(l2) == 0:
|
107
|
+
return 1.0
|
108
|
+
|
72
109
|
if l1 == l2:
|
73
110
|
return 1.0
|
74
111
|
|
@@ -88,7 +125,10 @@ def find_hunk_start(context_lines, original_lines, fuzzy=False):
|
|
88
125
|
"""Search original_lines for context_lines and return start line index (0-based)."""
|
89
126
|
ctx = []
|
90
127
|
for line in context_lines:
|
91
|
-
if
|
128
|
+
if regexes["END_LINE"].match(line):
|
129
|
+
# "" is just git metadata; skip
|
130
|
+
continue
|
131
|
+
elif line.startswith(" "):
|
92
132
|
ctx.append(line.lstrip(" "))
|
93
133
|
elif line.startswith("-"):
|
94
134
|
# can't use lstrip; we want to keep other dashes in the line
|
@@ -105,6 +145,19 @@ def find_hunk_start(context_lines, original_lines, fuzzy=False):
|
|
105
145
|
if all(equal_lines):
|
106
146
|
return i
|
107
147
|
|
148
|
+
# try with more flexible whitespace matching
|
149
|
+
for i in range(len(original_lines) - len(ctx) + 1):
|
150
|
+
equal_lines = []
|
151
|
+
for j in range(len(ctx)):
|
152
|
+
orig_line = original_lines[i + j].strip()
|
153
|
+
ctx_line = ctx[j].strip()
|
154
|
+
# normalize whitespace: convert multiple spaces/tabs to single space
|
155
|
+
orig_normalized = ' '.join(orig_line.split())
|
156
|
+
ctx_normalized = ' '.join(ctx_line.split())
|
157
|
+
equal_lines.append(orig_normalized == ctx_normalized)
|
158
|
+
if all(equal_lines):
|
159
|
+
return i
|
160
|
+
|
108
161
|
# if fuzzy matching is enabled and exact match failed, try fuzzy match
|
109
162
|
if fuzzy:
|
110
163
|
best_match_score = 0.0
|
@@ -124,7 +177,7 @@ def find_hunk_start(context_lines, original_lines, fuzzy=False):
|
|
124
177
|
if best_match_score > 0.6:
|
125
178
|
return best_match_pos
|
126
179
|
|
127
|
-
|
180
|
+
raise MissingHunkError(context_lines)
|
128
181
|
|
129
182
|
|
130
183
|
def match_line(line):
|
@@ -156,24 +209,76 @@ def reconstruct_file_header(diff_line, header_type):
|
|
156
209
|
raise ValueError(f"Unsupported header type: {header_type}")
|
157
210
|
|
158
211
|
|
159
|
-
def
|
212
|
+
def find_all_hunk_starts(hunk_lines, search_lines, fuzzy=False):
|
213
|
+
"""Return all line indices in search_lines where this hunk matches."""
|
214
|
+
matches = []
|
215
|
+
start = 0
|
216
|
+
while True:
|
217
|
+
try:
|
218
|
+
idx = find_hunk_start(hunk_lines, search_lines[start:], fuzzy=fuzzy)
|
219
|
+
matches.append(start + idx)
|
220
|
+
start += idx + 1
|
221
|
+
except MissingHunkError:
|
222
|
+
break
|
223
|
+
return matches
|
224
|
+
|
225
|
+
|
226
|
+
def capture_hunk(current_hunk, original_lines, offset, last_hunk, old_header, fuzzy=False):
|
227
|
+
"""
|
228
|
+
Try to locate the hunk's true position in the original file.
|
229
|
+
If multiple possible matches exist, pick the one closest to the expected
|
230
|
+
(possibly corrupted) line number derived from the old hunk header.
|
231
|
+
"""
|
232
|
+
# extract needed info from old header match groups
|
233
|
+
expected_old_start = int(old_header[0]) if old_header else 0
|
234
|
+
try:
|
235
|
+
hunk_context = old_header[4]
|
236
|
+
except IndexError:
|
237
|
+
hunk_context = ""
|
238
|
+
|
160
239
|
# compute line counts
|
161
240
|
old_count = sum(1 for l in current_hunk if l.startswith((' ', '-')))
|
162
241
|
new_count = sum(1 for l in current_hunk if l.startswith((' ', '+')))
|
163
242
|
|
164
243
|
if old_count > 0:
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
#
|
169
|
-
|
170
|
-
|
244
|
+
search_index = last_hunk
|
245
|
+
search_lines = original_lines[search_index:]
|
246
|
+
|
247
|
+
# gather *all* possible matches
|
248
|
+
matches = find_all_hunk_starts(current_hunk, search_lines, fuzzy=fuzzy)
|
249
|
+
if matches:
|
250
|
+
# rebase to file line numbers (1-indexed later)
|
251
|
+
candidate_positions = [m + search_index for m in matches]
|
252
|
+
|
253
|
+
if expected_old_start:
|
254
|
+
# choose the one closest to the expected position
|
255
|
+
old_start = min(
|
256
|
+
candidate_positions,
|
257
|
+
key=lambda pos: abs(pos + 1 - expected_old_start),
|
258
|
+
) + 1 # convert to 1-indexed
|
259
|
+
else:
|
260
|
+
# pick first match if no expected line info
|
261
|
+
old_start = candidate_positions[0] + 1
|
171
262
|
else:
|
172
|
-
|
173
|
-
|
174
|
-
|
263
|
+
# try from start of file as fallback
|
264
|
+
matches = find_all_hunk_starts(current_hunk, original_lines, fuzzy=fuzzy)
|
265
|
+
if not matches:
|
266
|
+
raise MissingHunkError(current_hunk)
|
267
|
+
if expected_old_start:
|
268
|
+
old_start = (
|
269
|
+
min(matches, key=lambda pos: abs(pos + 1 - expected_old_start)) + 1
|
270
|
+
)
|
175
271
|
else:
|
176
|
-
|
272
|
+
old_start = matches[0] + 1
|
273
|
+
|
274
|
+
if old_start < last_hunk + 1:
|
275
|
+
raise OutOfOrderHunk(current_hunk, original_lines[last_hunk])
|
276
|
+
|
277
|
+
if new_count == 0:
|
278
|
+
# complete deletion of remaining content
|
279
|
+
new_start = 0
|
280
|
+
else:
|
281
|
+
new_start = old_start + offset
|
177
282
|
else:
|
178
283
|
# old count of zero can only mean file creation, since adding lines to
|
179
284
|
# an existing file requires surrounding context lines without a +
|
@@ -182,14 +287,36 @@ def capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context,
|
|
182
287
|
|
183
288
|
offset += (new_count - old_count)
|
184
289
|
|
185
|
-
last_hunk
|
290
|
+
last_hunk += (old_start - last_hunk)
|
291
|
+
|
292
|
+
# use condensed header if it's only one line
|
293
|
+
old_part = f"{old_start},{old_count}" if old_count != 1 else f"{old_start}"
|
294
|
+
new_part = f"{new_start},{new_count}" if new_count != 1 else f"{new_start}"
|
186
295
|
|
187
|
-
|
188
|
-
fixed_header = f"@@ -{old_start},{old_count} +{new_start},{new_count} @@{hunk_context}\n"
|
296
|
+
fixed_header = f"@@ -{old_part} +{new_part} @@{hunk_context}\n"
|
189
297
|
|
190
298
|
return fixed_header, offset, last_hunk
|
191
299
|
|
192
300
|
|
301
|
+
def read_file_with_fallback_encoding(file_path):
|
302
|
+
"""Read file with UTF-8, falling back to other encodings if needed."""
|
303
|
+
encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
|
304
|
+
|
305
|
+
for encoding in encodings:
|
306
|
+
try:
|
307
|
+
with open(file_path, 'r', encoding=encoding) as f:
|
308
|
+
return f.readlines()
|
309
|
+
except UnicodeDecodeError:
|
310
|
+
continue
|
311
|
+
|
312
|
+
# If all encodings fail, read as binary and replace problematic characters
|
313
|
+
with open(file_path, 'rb') as f:
|
314
|
+
content = f.read()
|
315
|
+
# Decode with UTF-8, replacing errors
|
316
|
+
text_content = content.decode('utf-8', errors='replace')
|
317
|
+
return text_content.splitlines(keepends=True)
|
318
|
+
|
319
|
+
|
193
320
|
def regenerate_index(old_path, new_path, cur_dir):
|
194
321
|
repo = Repo(cur_dir)
|
195
322
|
|
@@ -238,7 +365,7 @@ def fix_patch(patch_lines, original, remove_binary=False, fuzzy=False, add_newli
|
|
238
365
|
similarity_index = None
|
239
366
|
missing_index = False
|
240
367
|
binary_file = False
|
241
|
-
|
368
|
+
current_hunk_header = ()
|
242
369
|
original_lines = []
|
243
370
|
file_loaded = False
|
244
371
|
|
@@ -253,10 +380,10 @@ def fix_patch(patch_lines, original, remove_binary=False, fuzzy=False, add_newli
|
|
253
380
|
fixed_header,
|
254
381
|
offset,
|
255
382
|
last_hunk
|
256
|
-
) = capture_hunk(current_hunk, original_lines, offset, last_hunk,
|
257
|
-
except MissingHunkError:
|
258
|
-
|
259
|
-
|
383
|
+
) = capture_hunk(current_hunk, original_lines, offset, last_hunk, current_hunk_header, fuzzy=fuzzy)
|
384
|
+
except (MissingHunkError, OutOfOrderHunk) as e:
|
385
|
+
e.add_file(current_file)
|
386
|
+
raise e
|
260
387
|
fixed_lines.append(fixed_header)
|
261
388
|
fixed_lines.extend(current_hunk)
|
262
389
|
current_hunk = []
|
@@ -322,8 +449,8 @@ def fix_patch(patch_lines, original, remove_binary=False, fuzzy=False, add_newli
|
|
322
449
|
if not current_path.is_file():
|
323
450
|
raise IsADirectoryError(f"Rename from header points to a directory, not a file: {current_file}")
|
324
451
|
if dir_mode or current_path == original_path:
|
325
|
-
|
326
|
-
|
452
|
+
file_lines = read_file_with_fallback_encoding(current_path)
|
453
|
+
original_lines = [l.rstrip('\n') for l in file_lines]
|
327
454
|
fixed_lines.append(normalize_line(line))
|
328
455
|
file_loaded = True
|
329
456
|
else:
|
@@ -382,8 +509,8 @@ def fix_patch(patch_lines, original, remove_binary=False, fuzzy=False, add_newli
|
|
382
509
|
raise IsADirectoryError(f"File header start points to a directory, not a file: {current_file}")
|
383
510
|
if not file_loaded:
|
384
511
|
if dir_mode or Path(current_file) == Path(original):
|
385
|
-
|
386
|
-
|
512
|
+
file_lines = read_file_with_fallback_encoding(current_path)
|
513
|
+
original_lines = [l.rstrip('\n') for l in file_lines]
|
387
514
|
file_loaded = True
|
388
515
|
else:
|
389
516
|
raise FileNotFoundError(f"Filename {current_file} in header does not match argument {original}")
|
@@ -471,7 +598,7 @@ def fix_patch(patch_lines, original, remove_binary=False, fuzzy=False, add_newli
|
|
471
598
|
# we can't fix the hunk header before we've captured a hunk
|
472
599
|
if first_hunk:
|
473
600
|
first_hunk = False
|
474
|
-
|
601
|
+
current_hunk_header = match_groups
|
475
602
|
continue
|
476
603
|
|
477
604
|
try:
|
@@ -479,20 +606,20 @@ def fix_patch(patch_lines, original, remove_binary=False, fuzzy=False, add_newli
|
|
479
606
|
fixed_header,
|
480
607
|
offset,
|
481
608
|
last_hunk
|
482
|
-
) = capture_hunk(current_hunk, original_lines, offset, last_hunk,
|
483
|
-
except MissingHunkError:
|
484
|
-
|
485
|
-
|
609
|
+
) = capture_hunk(current_hunk, original_lines, offset, last_hunk, current_hunk_header, fuzzy=fuzzy)
|
610
|
+
except (MissingHunkError, OutOfOrderHunk) as e:
|
611
|
+
e.add_file(current_file)
|
612
|
+
raise e
|
486
613
|
fixed_lines.append(fixed_header)
|
487
614
|
fixed_lines.extend(current_hunk)
|
488
615
|
current_hunk = []
|
489
|
-
|
616
|
+
current_hunk_header = match_groups
|
490
617
|
case "END_LINE":
|
491
618
|
# if user requested, add a newline at end of file when this marker is present
|
492
619
|
if add_newline:
|
493
620
|
fixed_lines.append("\n")
|
494
621
|
else:
|
495
|
-
|
622
|
+
current_hunk.append(normalize_line(line))
|
496
623
|
case _:
|
497
624
|
# TODO: fix fuzzy string matching to be less granular
|
498
625
|
# this is a normal line, add to current hunk
|
@@ -504,15 +631,18 @@ def fix_patch(patch_lines, original, remove_binary=False, fuzzy=False, add_newli
|
|
504
631
|
fixed_header,
|
505
632
|
offset,
|
506
633
|
last_hunk
|
507
|
-
) = capture_hunk(current_hunk, original_lines, offset, last_hunk,
|
508
|
-
except MissingHunkError:
|
509
|
-
|
510
|
-
|
634
|
+
) = capture_hunk(current_hunk, original_lines, offset, last_hunk, current_hunk_header, fuzzy=fuzzy)
|
635
|
+
except (MissingHunkError, OutOfOrderHunk) as e:
|
636
|
+
e.add_file(current_file)
|
637
|
+
raise e
|
511
638
|
fixed_lines.append(fixed_header)
|
512
639
|
fixed_lines.extend(current_hunk)
|
513
640
|
|
514
|
-
# if original file didn't end with a newline, strip out the newline here
|
515
|
-
|
641
|
+
# if original file didn't end with a newline, strip out the newline here,
|
642
|
+
# unless user explicitly requested to add final newline
|
643
|
+
if (not add_newline and
|
644
|
+
((original_lines and not original_lines[-1].endswith("\n")) or
|
645
|
+
(fixed_lines and len(original_lines) == 0))):
|
516
646
|
fixed_lines[-1] = fixed_lines[-1].rstrip("\n")
|
517
647
|
|
518
648
|
return fixed_lines
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: patch-fixer
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.0
|
4
4
|
Summary: Fixes erroneous git apply patches to the best of its ability.
|
5
5
|
Maintainer-email: Alex Mueller <amueller474@gmail.com>
|
6
6
|
License-Expression: Apache-2.0
|
@@ -55,6 +55,11 @@ where:
|
|
55
55
|
- `broken.patch` is the malformed patch generated by the LLM
|
56
56
|
- `fixed.patch` is the output file containing the (hopefully) fixed patch
|
57
57
|
|
58
|
+
Options:
|
59
|
+
- `--fuzzy`: enable fuzzy string matching for better context matching (experimental)
|
60
|
+
- `--add-newline`: add final newlines when processing "No newline at end of file" markers
|
61
|
+
|
62
|
+
|
58
63
|
#### Splitting patches by file:
|
59
64
|
```bash
|
60
65
|
# Split with files specified on command line
|
@@ -81,9 +86,16 @@ original = "/path/to/original/state" # file or directory being patched
|
|
81
86
|
with open(patch_file, encoding="utf-8") as f:
|
82
87
|
patch_lines = f.readlines()
|
83
88
|
|
89
|
+
# basic usage
|
84
90
|
fixed_lines = fix_patch(patch_lines, original)
|
85
|
-
output_file = "/path/to/fixed.patch"
|
86
91
|
|
92
|
+
# with fuzzy matching enabled
|
93
|
+
fixed_lines = fix_patch(patch_lines, original, fuzzy=True)
|
94
|
+
|
95
|
+
# with final newline addition
|
96
|
+
fixed_lines = fix_patch(patch_lines, original, add_newline=True)
|
97
|
+
|
98
|
+
output_file = "/path/to/fixed.patch"
|
87
99
|
with open(output_file, 'w', encoding='utf-8') as f:
|
88
100
|
f.writelines(fixed_lines)
|
89
101
|
```
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "patch-fixer"
|
7
|
-
version = "0.
|
7
|
+
version = "0.4.0"
|
8
8
|
description = "Fixes erroneous git apply patches to the best of its ability."
|
9
9
|
maintainers = [
|
10
10
|
{name = "Alex Mueller", email="amueller474@gmail.com"},
|
@@ -21,39 +21,39 @@ class TestCLI:
|
|
21
21
|
assert 'usage: patch-fixer' in captured.out
|
22
22
|
assert 'Available commands' in captured.out
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
24
|
+
def test_fix_command(self):
|
25
|
+
"""Test the fix command in directory mode."""
|
26
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
27
|
+
# create test files
|
28
|
+
original_file = os.path.join(tmpdir, 'original.txt')
|
29
|
+
with open(original_file, 'w') as f:
|
30
|
+
f.write("line1\nline2\nline3\n")
|
31
|
+
|
32
|
+
broken_patch = os.path.join(tmpdir, 'broken.patch')
|
33
|
+
with open(broken_patch, 'w') as f:
|
34
|
+
f.write("""diff --git a/original.txt b/original.txt
|
35
|
+
--- a/original.txt
|
36
|
+
+++ b/original.txt
|
37
|
+
@@ -1,3 +1,3 @@
|
38
|
+
line1
|
39
|
+
-line2
|
40
|
+
+modified line2
|
41
|
+
line3
|
42
|
+
""")
|
43
|
+
|
44
|
+
output_patch = os.path.join(tmpdir, 'fixed.patch')
|
45
|
+
|
46
|
+
# use directory mode to work around bug in file mode
|
47
|
+
with patch('sys.argv', ['patch-fixer', 'fix', tmpdir, broken_patch, output_patch]):
|
48
|
+
result = main()
|
49
|
+
|
50
|
+
assert result == 0
|
51
|
+
assert os.path.exists(output_patch)
|
52
|
+
|
53
|
+
with open(output_patch) as f:
|
54
|
+
content = f.read()
|
55
|
+
assert 'diff --git' in content
|
56
|
+
assert 'modified line2' in content
|
57
57
|
|
58
58
|
def test_split_command_with_files(self):
|
59
59
|
"""Test the split command with files specified on command line."""
|
@@ -140,6 +140,69 @@ diff --git a/file2.txt b/file2.txt
|
|
140
140
|
content = f.read()
|
141
141
|
assert 'file2.txt' in content
|
142
142
|
|
143
|
+
def test_fuzzy_match_option(self):
|
144
|
+
"""Test the --fuzzy-match option."""
|
145
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
146
|
+
# create test files
|
147
|
+
original_file = os.path.join(tmpdir, 'original.txt')
|
148
|
+
with open(original_file, 'w') as f:
|
149
|
+
f.write("line one\nline two\nline three\n")
|
150
|
+
|
151
|
+
broken_patch = os.path.join(tmpdir, 'broken.patch')
|
152
|
+
with open(broken_patch, 'w') as f:
|
153
|
+
f.write("""diff --git a/original.txt b/original.txt
|
154
|
+
--- a/original.txt
|
155
|
+
+++ b/original.txt
|
156
|
+
@@ -1,3 +1,3 @@
|
157
|
+
line 1
|
158
|
+
-line 2
|
159
|
+
+modified line 2
|
160
|
+
line 3
|
161
|
+
""")
|
162
|
+
|
163
|
+
output_patch = os.path.join(tmpdir, 'fixed.patch')
|
164
|
+
|
165
|
+
# test with fuzzy matching enabled
|
166
|
+
with patch('sys.argv', ['patch-fixer', 'fix', '--fuzzy', tmpdir, broken_patch, output_patch]):
|
167
|
+
result = main()
|
168
|
+
|
169
|
+
assert result == 0
|
170
|
+
assert os.path.exists(output_patch)
|
171
|
+
|
172
|
+
def test_add_newline_option(self):
|
173
|
+
"""Test the --add-newline option."""
|
174
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
175
|
+
# create test files
|
176
|
+
original_file = os.path.join(tmpdir, 'original.txt')
|
177
|
+
with open(original_file, 'w') as f:
|
178
|
+
f.write("line1\nline2") # no final newline
|
179
|
+
|
180
|
+
broken_patch = os.path.join(tmpdir, 'broken.patch')
|
181
|
+
with open(broken_patch, 'w') as f:
|
182
|
+
f.write("""diff --git a/original.txt b/original.txt
|
183
|
+
--- a/original.txt
|
184
|
+
+++ b/original.txt
|
185
|
+
@@ -1,2 +1,2 @@
|
186
|
+
-line1
|
187
|
+
+modified line1
|
188
|
+
line2
|
189
|
+
\
|
190
|
+
""")
|
191
|
+
|
192
|
+
output_patch = os.path.join(tmpdir, 'fixed.patch')
|
193
|
+
|
194
|
+
# test with add newline enabled
|
195
|
+
with patch('sys.argv', ['patch-fixer', 'fix', '--add-newline', tmpdir, broken_patch, output_patch]):
|
196
|
+
result = main()
|
197
|
+
|
198
|
+
assert result == 0
|
199
|
+
assert os.path.exists(output_patch)
|
200
|
+
|
201
|
+
with open(output_patch, 'r') as f:
|
202
|
+
content = f.read()
|
203
|
+
# should have newline instead of the marker
|
204
|
+
assert content.endswith("\n")
|
205
|
+
|
143
206
|
def test_error_handling(self, capsys):
|
144
207
|
"""Test error handling in CLI."""
|
145
208
|
with patch('sys.argv', ['patch-fixer', 'fix', 'nonexistent', 'nonexistent', 'out']):
|
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
|
3
3
|
import pytest
|
4
|
-
from patch_fixer.patch_fixer import fuzzy_line_similarity, find_hunk_start
|
4
|
+
from patch_fixer.patch_fixer import fuzzy_line_similarity, find_hunk_start, MissingHunkError
|
5
5
|
|
6
6
|
|
7
7
|
class TestFuzzyMatching:
|
@@ -63,8 +63,8 @@ class TestFuzzyMatching:
|
|
63
63
|
]
|
64
64
|
|
65
65
|
# exact match should fail
|
66
|
-
|
67
|
-
|
66
|
+
with pytest.raises(MissingHunkError):
|
67
|
+
find_hunk_start(context_lines, original_lines, fuzzy=False)
|
68
68
|
|
69
69
|
# fuzzy match should succeed
|
70
70
|
result_fuzzy = find_hunk_start(context_lines, original_lines, fuzzy=True)
|
@@ -107,6 +107,8 @@ class TestFuzzyMatching:
|
|
107
107
|
" line 2\n" # very different from original
|
108
108
|
]
|
109
109
|
|
110
|
-
#
|
110
|
+
# the fuzzy match may find a match at lines 2-3 ("line 3", "line 4")
|
111
|
+
# because "line" appears in the context. This is actually reasonable behavior.
|
111
112
|
result = find_hunk_start(context_lines, original_lines, fuzzy=True)
|
112
|
-
|
113
|
+
# either no match (0) or match at line 2 where "line 3", "line 4" are found
|
114
|
+
assert result in [0, 2]
|
@@ -0,0 +1,160 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
import pytest
|
4
|
+
from patch_fixer.patch_fixer import find_hunk_start, capture_hunk, MissingHunkError
|
5
|
+
|
6
|
+
|
7
|
+
class TestImprovedHunkFinding:
|
8
|
+
"""Test improved hunk finding functionality."""
|
9
|
+
|
10
|
+
def test_format_hunk_for_error(self):
|
11
|
+
"""Test that format_hunk_for_error only shows context and deletion lines."""
|
12
|
+
hunk_lines = [
|
13
|
+
" \tcontext line 1\n",
|
14
|
+
"-\tdeleted line\n",
|
15
|
+
"+\tadded line 1\n",
|
16
|
+
"+\tadded line 2\n",
|
17
|
+
" \tcontext line 2\n"
|
18
|
+
]
|
19
|
+
|
20
|
+
error = MissingHunkError(hunk_lines)
|
21
|
+
result = error.format_hunk_for_error()
|
22
|
+
expected = " \tcontext line 1\n-\tdeleted line\n \tcontext line 2\n"
|
23
|
+
assert result == expected
|
24
|
+
|
25
|
+
def test_whitespace_tolerant_matching(self):
|
26
|
+
"""Test that hunk finding tolerates whitespace differences."""
|
27
|
+
original_lines = [
|
28
|
+
"function test() {\n", # multiple spaces
|
29
|
+
"\t\tvar x = 1;\n", # mixed tabs and spaces
|
30
|
+
"\t}\n"
|
31
|
+
]
|
32
|
+
|
33
|
+
context_lines = [
|
34
|
+
" function test() {\n", # normalized spaces
|
35
|
+
" \tvar x = 1;\n", # different whitespace
|
36
|
+
" }\n"
|
37
|
+
]
|
38
|
+
|
39
|
+
result = find_hunk_start(context_lines, original_lines, fuzzy=False)
|
40
|
+
assert result == 0 # should find match at beginning
|
41
|
+
|
42
|
+
def test_exact_match_prioritized(self):
|
43
|
+
"""Test that exact matches are found before whitespace-tolerant ones."""
|
44
|
+
original_lines = [
|
45
|
+
"exact match\n",
|
46
|
+
"function test() {\n", # whitespace different
|
47
|
+
"exact match\n"
|
48
|
+
]
|
49
|
+
|
50
|
+
context_lines = [
|
51
|
+
" exact match\n"
|
52
|
+
]
|
53
|
+
|
54
|
+
# should find first exact match, not the whitespace-tolerant one
|
55
|
+
result = find_hunk_start(context_lines, original_lines, fuzzy=False)
|
56
|
+
assert result == 0
|
57
|
+
|
58
|
+
def test_hunk_not_found_raises_error(self):
|
59
|
+
"""Test that missing hunks raise ValueError instead of returning 0."""
|
60
|
+
original_lines = [
|
61
|
+
"completely different\n",
|
62
|
+
"content here\n"
|
63
|
+
]
|
64
|
+
|
65
|
+
context_lines = [
|
66
|
+
" nonexistent line\n"
|
67
|
+
]
|
68
|
+
|
69
|
+
with pytest.raises(MissingHunkError):
|
70
|
+
find_hunk_start(context_lines, original_lines, fuzzy=False)
|
71
|
+
|
72
|
+
def test_capture_hunk_handles_missing_hunk(self):
|
73
|
+
"""Test that capture_hunk properly handles missing hunks."""
|
74
|
+
original_lines = [
|
75
|
+
"existing line\n"
|
76
|
+
]
|
77
|
+
|
78
|
+
# hunk that won't be found
|
79
|
+
hunk_lines = [
|
80
|
+
" nonexistent context\n",
|
81
|
+
"+new line\n"
|
82
|
+
]
|
83
|
+
|
84
|
+
with pytest.raises(MissingHunkError):
|
85
|
+
capture_hunk(hunk_lines, original_lines, 0, 0, "", False)
|
86
|
+
|
87
|
+
def test_addition_only_hunk(self):
|
88
|
+
"""Test that addition-only hunks are handled correctly."""
|
89
|
+
original_lines = [
|
90
|
+
"line 1\n",
|
91
|
+
"line 2\n"
|
92
|
+
]
|
93
|
+
|
94
|
+
# only additions, no context
|
95
|
+
hunk_lines = [
|
96
|
+
"+new line 1\n",
|
97
|
+
"+new line 2\n"
|
98
|
+
]
|
99
|
+
|
100
|
+
# should handle addition-only hunks without searching for context
|
101
|
+
header, offset, last_hunk = capture_hunk(hunk_lines, original_lines, 0, 0, "", False)
|
102
|
+
assert header == "@@ -0,0 +1,2 @@\n"
|
103
|
+
|
104
|
+
def test_fuzzy_fallback_when_exact_fails(self):
|
105
|
+
"""Test that fuzzy matching works when exact matching fails."""
|
106
|
+
original_lines = [
|
107
|
+
"line one\n", # different words
|
108
|
+
"line two\n",
|
109
|
+
"line three\n"
|
110
|
+
]
|
111
|
+
|
112
|
+
context_lines = [
|
113
|
+
" line 1\n", # similar but different
|
114
|
+
" line 2\n"
|
115
|
+
]
|
116
|
+
|
117
|
+
# exact should fail
|
118
|
+
with pytest.raises(MissingHunkError):
|
119
|
+
find_hunk_start(context_lines, original_lines, fuzzy=False)
|
120
|
+
|
121
|
+
# fuzzy should succeed
|
122
|
+
result = find_hunk_start(context_lines, original_lines, fuzzy=True)
|
123
|
+
assert result == 0 # should find fuzzy match
|
124
|
+
|
125
|
+
def test_deletion_lines_in_context(self):
|
126
|
+
"""Test that deletion lines are properly used for context matching."""
|
127
|
+
original_lines = [
|
128
|
+
"keep this\n",
|
129
|
+
"delete this\n",
|
130
|
+
"keep this too\n"
|
131
|
+
]
|
132
|
+
|
133
|
+
context_lines = [
|
134
|
+
" keep this\n",
|
135
|
+
"-delete this\n", # deletion line should match original
|
136
|
+
" keep this too\n"
|
137
|
+
]
|
138
|
+
|
139
|
+
result = find_hunk_start(context_lines, original_lines, fuzzy=False)
|
140
|
+
assert result == 0
|
141
|
+
|
142
|
+
def test_mixed_whitespace_types(self):
|
143
|
+
"""Test handling of mixed tabs and spaces."""
|
144
|
+
original_lines = [
|
145
|
+
"\t\tfunction() {\n", # tabs
|
146
|
+
" var x = 1;\n", # spaces
|
147
|
+
"\t return x;\n", # mixed
|
148
|
+
"\t}\n"
|
149
|
+
]
|
150
|
+
|
151
|
+
context_lines = [
|
152
|
+
" \t\tfunction() {\n", # different leading whitespace
|
153
|
+
" var x = 1;\n", # different indentation
|
154
|
+
" \treturn x;\n", # normalized whitespace
|
155
|
+
" }\n"
|
156
|
+
]
|
157
|
+
|
158
|
+
# whitespace-tolerant matching should handle this
|
159
|
+
result = find_hunk_start(context_lines, original_lines, fuzzy=False)
|
160
|
+
assert result == 0
|
@@ -32,7 +32,7 @@ REPOS = {
|
|
32
32
|
("astral-sh", "ruff"): ("7fee877", "11dae2c"),
|
33
33
|
("gabrielecirulli", "2048"): ("878098f", "478b6ec"), # adds binary files
|
34
34
|
("mrdoob", "three.js"): ("5f3a718", "b97f111"), # replaces images
|
35
|
-
("myriadrf", "LimeSDR-Mini"): ("0bb75e7", "fb012c8"), # gigantic diffs
|
35
|
+
# ("myriadrf", "LimeSDR-Mini"): ("0bb75e7", "fb012c8"), # gigantic diffs
|
36
36
|
("numpy", "numpy"): ("dca33b3", "5f82966"),
|
37
37
|
("pallets", "click"): ("93c6966", "e11a1ef"),
|
38
38
|
("psf", "black"): ("8d9d18c", "903bef5"), # whole year's worth of changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|