patch-fixer 0.3.1__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patch-fixer
3
- Version: 0.3.1
3
+ Version: 0.3.2
4
4
  Summary: Fixes erroneous git apply patches to the best of its ability.
5
5
  Maintainer-email: Alex Mueller <amueller474@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -0,0 +1,2 @@
1
+ from .patch_fixer import fix_patch
2
+ from .split import split_patch
@@ -204,7 +204,7 @@ def fix_patch(patch_lines, original, remove_binary=False):
204
204
  ) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context)
205
205
  except MissingHunkError:
206
206
  raise NotImplementedError(f"Could not find hunk in {current_file}:"
207
- f"\n\n{"".join(current_hunk)}")
207
+ f"\n\n{''.join(current_hunk)}")
208
208
  fixed_lines.append(fixed_header)
209
209
  fixed_lines.extend(current_hunk)
210
210
  current_hunk = []
@@ -415,7 +415,7 @@ def fix_patch(patch_lines, original, remove_binary=False):
415
415
  ) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context)
416
416
  except MissingHunkError:
417
417
  raise NotImplementedError(f"Could not find hunk in {current_file}:"
418
- f"\n\n{"".join(current_hunk)}")
418
+ f"\n\n{''.join(current_hunk)}")
419
419
  fixed_lines.append(fixed_header)
420
420
  fixed_lines.extend(current_hunk)
421
421
  current_hunk = []
@@ -437,7 +437,7 @@ def fix_patch(patch_lines, original, remove_binary=False):
437
437
  ) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context)
438
438
  except MissingHunkError:
439
439
  raise NotImplementedError(f"Could not find hunk in {current_file}:"
440
- f"\n\n{"".join(current_hunk)}")
440
+ f"\n\n{''.join(current_hunk)}")
441
441
  fixed_lines.append(fixed_header)
442
442
  fixed_lines.extend(current_hunk)
443
443
 
@@ -0,0 +1,119 @@
1
+ """
2
+ Idea:
3
+
4
+ 1. main function takes in:
5
+ a. patch file
6
+ b. list of files to split out
7
+ 2. reads patch file, splits based on file headers (assumed to be valid)
8
+ 3. for each file being patched:
9
+ a. if the file is in the list, send its hunks to output 1
10
+ b. otherwise send its hunks to output 2
11
+ c. hunks include all header lines so each output is a valid diff
12
+
13
+ Could share some functionality with refactored, modular version of fix_patch
14
+ """
15
+
16
+ import re
17
+ from typing import List, Tuple
18
+
19
+ from .patch_fixer import match_line, normalize_line, split_ab
20
+
21
+
22
+ def get_file_path_from_diff(line: str) -> str:
23
+ """Extract the file path from a diff line."""
24
+ match_groups, line_type = match_line(line)
25
+ if line_type != "DIFF_LINE":
26
+ raise ValueError(f"Expected DIFF_LINE but got {line_type}")
27
+
28
+ # get the 'a' path (source file)
29
+ a_path, _ = split_ab(match_groups)
30
+ return a_path
31
+
32
+
33
+ def split_patch(patch_lines: List[str], files_to_include: List[str]) -> Tuple[List[str], List[str]]:
34
+ """
35
+ Split a patch into two parts based on a list of files to include.
36
+
37
+ Parameters
38
+ ----------
39
+ patch_lines : List[str]
40
+ Lines of the patch file to split.
41
+ files_to_include : List[str]
42
+ List of file paths (relative, starting with ./) to include in the first output.
43
+ Files not in this list go to the second output.
44
+
45
+ Returns
46
+ -------
47
+ included_lines : List[str]
48
+ Lines for the patch containing only the included files.
49
+ excluded_lines : List[str]
50
+ Lines for the patch containing all other files.
51
+
52
+ Notes
53
+ -----
54
+ The function preserves all header information for each file's hunks
55
+ to ensure both output patches are valid. File paths are normalized
56
+ to start with './' for comparison purposes.
57
+
58
+ Raises
59
+ ------
60
+ ValueError
61
+ If the patch format is invalid or cannot be parsed.
62
+ """
63
+ if not patch_lines:
64
+ raise ValueError("Empty patch provided")
65
+
66
+ # normalize file paths to include
67
+ normalized_include = set()
68
+ for path in files_to_include:
69
+ if not path.startswith("./"):
70
+ path = f"./{path}"
71
+ normalized_include.add(path)
72
+
73
+ included_lines = []
74
+ excluded_lines = []
75
+ current_file_lines = []
76
+ current_file_path = None
77
+ in_file_block = False
78
+
79
+ for line in patch_lines:
80
+ match_groups, line_type = match_line(line)
81
+
82
+ if line_type == "DIFF_LINE":
83
+ # start of a new file block
84
+ if in_file_block and current_file_lines:
85
+ # output the previous file block
86
+ if current_file_path in normalized_include:
87
+ included_lines.extend(current_file_lines)
88
+ else:
89
+ excluded_lines.extend(current_file_lines)
90
+
91
+ # start collecting new file block
92
+ current_file_lines = [normalize_line(line)]
93
+ current_file_path = get_file_path_from_diff(line)
94
+ in_file_block = True
95
+
96
+ elif in_file_block:
97
+ # continue collecting lines for current file
98
+ current_file_lines.append(normalize_line(line))
99
+
100
+ else:
101
+ # lines before any diff (shouldn't happen in well-formed patches)
102
+ # add to both outputs to preserve any global headers
103
+ normalized = normalize_line(line)
104
+ included_lines.append(normalized)
105
+ excluded_lines.append(normalized)
106
+
107
+ # don't forget the last file block
108
+ if in_file_block and current_file_lines:
109
+ if current_file_path in normalized_include:
110
+ included_lines.extend(current_file_lines)
111
+ else:
112
+ excluded_lines.extend(current_file_lines)
113
+
114
+ # handle edge case where no files were split (no diff lines)
115
+ if not in_file_block:
116
+ # patch had no diff lines at all
117
+ return patch_lines, []
118
+
119
+ return included_lines, excluded_lines
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patch-fixer
3
- Version: 0.3.1
3
+ Version: 0.3.2
4
4
  Summary: Fixes erroneous git apply patches to the best of its ability.
5
5
  Maintainer-email: Alex Mueller <amueller474@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -3,10 +3,12 @@ README.md
3
3
  pyproject.toml
4
4
  patch_fixer/__init__.py
5
5
  patch_fixer/patch_fixer.py
6
+ patch_fixer/split.py
6
7
  patch_fixer.egg-info/PKG-INFO
7
8
  patch_fixer.egg-info/SOURCES.txt
8
9
  patch_fixer.egg-info/dependency_links.txt
9
10
  patch_fixer.egg-info/requires.txt
10
11
  patch_fixer.egg-info/top_level.txt
11
12
  tests/test_norm.py
12
- tests/test_repos.py
13
+ tests/test_repos.py
14
+ tests/test_split.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "patch-fixer"
7
- version = "0.3.1"
7
+ version = "0.3.2"
8
8
  description = "Fixes erroneous git apply patches to the best of its ability."
9
9
  maintainers = [
10
10
  {name = "Alex Mueller", email="amueller474@gmail.com"},
@@ -0,0 +1,386 @@
1
+ """Tests for the split_patch functionality."""
2
+
3
+ import pytest
4
+
5
+ from patch_fixer.split import split_patch
6
+
7
+
8
+ class TestSplitPatch:
9
+ """Test cases for split_patch function."""
10
+
11
+ def test_simple_split(self):
12
+ """Test basic splitting with two files."""
13
+ patch = [
14
+ "diff --git a/file1.txt b/file1.txt\n",
15
+ "index 1234567..abcdefg 100644\n",
16
+ "--- a/file1.txt\n",
17
+ "+++ b/file1.txt\n",
18
+ "@@ -1,3 +1,3 @@\n",
19
+ " line1\n",
20
+ "-old line\n",
21
+ "+new line\n",
22
+ " line3\n",
23
+ "diff --git a/file2.txt b/file2.txt\n",
24
+ "index 2234567..bbcdefg 100644\n",
25
+ "--- a/file2.txt\n",
26
+ "+++ b/file2.txt\n",
27
+ "@@ -1,2 +1,2 @@\n",
28
+ "-removed\n",
29
+ "+added\n",
30
+ ]
31
+
32
+ included, excluded = split_patch(patch, ["./file1.txt"])
33
+
34
+ # check that file1 is in included
35
+ assert "diff --git a/file1.txt b/file1.txt\n" in included
36
+ assert "+new line\n" in included
37
+
38
+ # check that file2 is in excluded
39
+ assert "diff --git a/file2.txt b/file2.txt\n" in excluded
40
+ assert "+added\n" in excluded
41
+
42
+ # check that files are not mixed
43
+ assert "file2.txt" not in "".join(included)
44
+ assert "file1.txt" not in "".join(excluded)
45
+
46
+ def test_split_with_multiple_includes(self):
47
+ """Test splitting with multiple files to include."""
48
+ patch = [
49
+ "diff --git a/file1.txt b/file1.txt\n",
50
+ "--- a/file1.txt\n",
51
+ "+++ b/file1.txt\n",
52
+ "@@ -1,1 +1,1 @@\n",
53
+ "-old1\n",
54
+ "+new1\n",
55
+ "diff --git a/file2.txt b/file2.txt\n",
56
+ "--- a/file2.txt\n",
57
+ "+++ b/file2.txt\n",
58
+ "@@ -1,1 +1,1 @@\n",
59
+ "-old2\n",
60
+ "+new2\n",
61
+ "diff --git a/file3.txt b/file3.txt\n",
62
+ "--- a/file3.txt\n",
63
+ "+++ b/file3.txt\n",
64
+ "@@ -1,1 +1,1 @@\n",
65
+ "-old3\n",
66
+ "+new3\n",
67
+ ]
68
+
69
+ included, excluded = split_patch(patch, ["./file1.txt", "./file3.txt"])
70
+
71
+ # verify file1 and file3 are included
72
+ assert "file1.txt" in "".join(included)
73
+ assert "file3.txt" in "".join(included)
74
+ assert "+new1\n" in included
75
+ assert "+new3\n" in included
76
+
77
+ # verify file2 is excluded
78
+ assert "file2.txt" in "".join(excluded)
79
+ assert "+new2\n" in excluded
80
+ assert "file1.txt" not in "".join(excluded)
81
+ assert "file3.txt" not in "".join(excluded)
82
+
83
+ def test_split_with_no_includes(self):
84
+ """Test when no files match the include list."""
85
+ patch = [
86
+ "diff --git a/file1.txt b/file1.txt\n",
87
+ "--- a/file1.txt\n",
88
+ "+++ b/file1.txt\n",
89
+ "@@ -1,1 +1,1 @@\n",
90
+ "-old\n",
91
+ "+new\n",
92
+ ]
93
+
94
+ included, excluded = split_patch(patch, ["./nonexistent.txt"])
95
+
96
+ # all content should be in excluded
97
+ assert len(included) == 0
98
+ assert "file1.txt" in "".join(excluded)
99
+ assert "+new\n" in excluded
100
+
101
+ def test_split_with_all_includes(self):
102
+ """Test when all files match the include list."""
103
+ patch = [
104
+ "diff --git a/file1.txt b/file1.txt\n",
105
+ "--- a/file1.txt\n",
106
+ "+++ b/file1.txt\n",
107
+ "@@ -1,1 +1,1 @@\n",
108
+ "-old\n",
109
+ "+new\n",
110
+ ]
111
+
112
+ included, excluded = split_patch(patch, ["./file1.txt"])
113
+
114
+ # all content should be in included
115
+ assert "file1.txt" in "".join(included)
116
+ assert "+new\n" in included
117
+ assert len(excluded) == 0
118
+
119
+ def test_split_preserves_headers(self):
120
+ """Test that all necessary headers are preserved."""
121
+ patch = [
122
+ "diff --git a/file.txt b/file.txt\n",
123
+ "index 1234567..abcdefg 100644\n",
124
+ "new file mode 100644\n",
125
+ "--- /dev/null\n",
126
+ "+++ b/file.txt\n",
127
+ "@@ -0,0 +1,2 @@\n",
128
+ "+new file\n",
129
+ "+content\n",
130
+ ]
131
+
132
+ included, excluded = split_patch(patch, ["./file.txt"])
133
+
134
+ # check all headers are preserved
135
+ assert "diff --git a/file.txt b/file.txt\n" in included
136
+ assert "index 1234567..abcdefg 100644\n" in included
137
+ assert "new file mode 100644\n" in included
138
+ assert "--- /dev/null\n" in included
139
+ assert "+++ b/file.txt\n" in included
140
+ assert "@@ -0,0 +1,2 @@\n" in included
141
+
142
+ def test_split_with_rename(self):
143
+ """Test splitting patches with file renames."""
144
+ patch = [
145
+ "diff --git a/old.txt b/new.txt\n",
146
+ "similarity index 95%\n",
147
+ "rename from old.txt\n",
148
+ "rename to new.txt\n",
149
+ "index 1234567..abcdefg 100644\n",
150
+ "--- a/old.txt\n",
151
+ "+++ b/new.txt\n",
152
+ "@@ -1,3 +1,3 @@\n",
153
+ " same line\n",
154
+ "-old content\n",
155
+ "+new content\n",
156
+ ]
157
+
158
+ # include based on old name (source file)
159
+ included, excluded = split_patch(patch, ["./old.txt"])
160
+
161
+ assert "rename from old.txt\n" in included
162
+ assert "rename to new.txt\n" in included
163
+ assert len(excluded) == 0
164
+
165
+ def test_split_with_binary_files(self):
166
+ """Test splitting patches containing binary files."""
167
+ patch = [
168
+ "diff --git a/image.png b/image.png\n",
169
+ "index 1234567..abcdefg 100644\n",
170
+ "Binary files a/image.png and b/image.png differ\n",
171
+ "diff --git a/text.txt b/text.txt\n",
172
+ "--- a/text.txt\n",
173
+ "+++ b/text.txt\n",
174
+ "@@ -1,1 +1,1 @@\n",
175
+ "-old\n",
176
+ "+new\n",
177
+ ]
178
+
179
+ included, excluded = split_patch(patch, ["./image.png"])
180
+
181
+ assert "Binary files a/image.png and b/image.png differ\n" in included
182
+ assert "text.txt" in "".join(excluded)
183
+ assert "image.png" not in "".join(excluded)
184
+
185
+ def test_normalization_of_paths(self):
186
+ """Test that file paths are normalized correctly."""
187
+ patch = [
188
+ "diff --git a/file.txt b/file.txt\n",
189
+ "--- a/file.txt\n",
190
+ "+++ b/file.txt\n",
191
+ "@@ -1,1 +1,1 @@\n",
192
+ "-old\n",
193
+ "+new\n",
194
+ ]
195
+
196
+ # test without ./ prefix
197
+ included1, excluded1 = split_patch(patch, ["file.txt"])
198
+ assert "file.txt" in "".join(included1)
199
+ assert len(excluded1) == 0
200
+
201
+ # test with ./ prefix
202
+ included2, excluded2 = split_patch(patch, ["./file.txt"])
203
+ assert "file.txt" in "".join(included2)
204
+ assert len(excluded2) == 0
205
+
206
+ # both should produce same result
207
+ assert included1 == included2
208
+ assert excluded1 == excluded2
209
+
210
+ def test_empty_patch(self):
211
+ """Test behavior with empty patch."""
212
+ with pytest.raises(ValueError, match="Empty patch provided"):
213
+ split_patch([], ["./file.txt"])
214
+
215
+ def test_invalid_diff_format(self):
216
+ """Test behavior with invalid diff format."""
217
+ invalid_patch = [
218
+ "not a valid diff line\n",
219
+ "diff --git a/file.txt b/file.txt\n",
220
+ "--- a/file.txt\n",
221
+ "+++ b/file.txt\n",
222
+ "@@ -1,1 +1,1 @@\n",
223
+ "-old\n",
224
+ "+new\n",
225
+ ]
226
+
227
+ # should handle gracefully - non-diff lines before first diff
228
+ included, excluded = split_patch(invalid_patch, ["./file.txt"])
229
+
230
+ # the invalid line should be in both outputs (global header behavior)
231
+ assert "not a valid diff line\n" in included
232
+ assert "not a valid diff line\n" in excluded
233
+
234
+ def test_no_files_in_include_list(self):
235
+ """Test when include list is empty."""
236
+ patch = [
237
+ "diff --git a/file.txt b/file.txt\n",
238
+ "--- a/file.txt\n",
239
+ "+++ b/file.txt\n",
240
+ "@@ -1,1 +1,1 @@\n",
241
+ "-old\n",
242
+ "+new\n",
243
+ ]
244
+
245
+ included, excluded = split_patch(patch, [])
246
+
247
+ # everything should go to excluded
248
+ assert len(included) == 0
249
+ assert "file.txt" in "".join(excluded)
250
+
251
+ def test_patch_with_no_diff_lines(self):
252
+ """Test patch that has no actual diff lines."""
253
+ patch = [
254
+ "This is a comment\n",
255
+ "Another comment\n",
256
+ ]
257
+
258
+ included, excluded = split_patch(patch, ["./file.txt"])
259
+
260
+ # non-diff lines should appear in original form
261
+ assert patch == included
262
+ assert len(excluded) == 0
263
+
264
+ def test_multiple_hunks_same_file(self):
265
+ """Test that multiple hunks for the same file stay together."""
266
+ patch = [
267
+ "diff --git a/file.txt b/file.txt\n",
268
+ "--- a/file.txt\n",
269
+ "+++ b/file.txt\n",
270
+ "@@ -1,1 +1,1 @@\n",
271
+ "-old1\n",
272
+ "+new1\n",
273
+ "@@ -10,1 +10,1 @@\n",
274
+ "-old2\n",
275
+ "+new2\n",
276
+ "@@ -20,1 +20,1 @@\n",
277
+ "-old3\n",
278
+ "+new3\n",
279
+ ]
280
+
281
+ included, excluded = split_patch(patch, ["./file.txt"])
282
+
283
+ # all hunks should be in included
284
+ assert "+new1\n" in included
285
+ assert "+new2\n" in included
286
+ assert "+new3\n" in included
287
+ assert len(excluded) == 0
288
+
289
+ def test_file_deletion(self):
290
+ """Test splitting patches with file deletions."""
291
+ patch = [
292
+ "diff --git a/deleted.txt b/deleted.txt\n",
293
+ "deleted file mode 100644\n",
294
+ "index 1234567..0000000\n",
295
+ "--- a/deleted.txt\n",
296
+ "+++ /dev/null\n",
297
+ "@@ -1,3 +0,0 @@\n",
298
+ "-line1\n",
299
+ "-line2\n",
300
+ "-line3\n",
301
+ ]
302
+
303
+ included, excluded = split_patch(patch, ["./deleted.txt"])
304
+
305
+ assert "deleted file mode 100644\n" in included
306
+ assert "+++ /dev/null\n" in included
307
+ assert "-line1\n" in included
308
+ assert len(excluded) == 0
309
+
310
+ def test_file_creation(self):
311
+ """Test splitting patches with new file creation."""
312
+ patch = [
313
+ "diff --git a/new.txt b/new.txt\n",
314
+ "new file mode 100644\n",
315
+ "index 0000000..1234567\n",
316
+ "--- /dev/null\n",
317
+ "+++ b/new.txt\n",
318
+ "@@ -0,0 +1,3 @@\n",
319
+ "+line1\n",
320
+ "+line2\n",
321
+ "+line3\n",
322
+ ]
323
+
324
+ included, excluded = split_patch(patch, ["./new.txt"])
325
+
326
+ assert "new file mode 100644\n" in included
327
+ assert "--- /dev/null\n" in included
328
+ assert "+line1\n" in included
329
+ assert len(excluded) == 0
330
+
331
+ def test_complex_patch(self):
332
+ """Test a complex patch with various file operations."""
333
+ patch = [
334
+ "diff --git a/modified.txt b/modified.txt\n",
335
+ "index 1234567..abcdefg 100644\n",
336
+ "--- a/modified.txt\n",
337
+ "+++ b/modified.txt\n",
338
+ "@@ -1,1 +1,1 @@\n",
339
+ "-old\n",
340
+ "+new\n",
341
+ "diff --git a/created.txt b/created.txt\n",
342
+ "new file mode 100644\n",
343
+ "index 0000000..2234567\n",
344
+ "--- /dev/null\n",
345
+ "+++ b/created.txt\n",
346
+ "@@ -0,0 +1,1 @@\n",
347
+ "+created content\n",
348
+ "diff --git a/deleted.txt b/deleted.txt\n",
349
+ "deleted file mode 100644\n",
350
+ "index 3234567..0000000\n",
351
+ "--- a/deleted.txt\n",
352
+ "+++ /dev/null\n",
353
+ "@@ -1,1 +0,0 @@\n",
354
+ "-deleted content\n",
355
+ "diff --git a/renamed_old.txt b/renamed_new.txt\n",
356
+ "similarity index 90%\n",
357
+ "rename from renamed_old.txt\n",
358
+ "rename to renamed_new.txt\n",
359
+ "index 4234567..5234567\n",
360
+ "--- a/renamed_old.txt\n",
361
+ "+++ b/renamed_new.txt\n",
362
+ "@@ -1,1 +1,1 @@\n",
363
+ "-before rename\n",
364
+ "+after rename\n",
365
+ ]
366
+
367
+ # include modified and renamed files
368
+ included, excluded = split_patch(patch, ["./modified.txt", "./renamed_old.txt"])
369
+
370
+ # check included has modified and renamed
371
+ assert "modified.txt" in "".join(included)
372
+ assert "renamed_old.txt" in "".join(included)
373
+ assert "+new\n" in included
374
+ assert "+after rename\n" in included
375
+
376
+ # check excluded has created and deleted
377
+ assert "created.txt" in "".join(excluded)
378
+ assert "deleted.txt" in "".join(excluded)
379
+ assert "+created content\n" in excluded
380
+ assert "-deleted content\n" in excluded
381
+
382
+ # check no cross-contamination
383
+ assert "created.txt" not in "".join(included)
384
+ assert "deleted.txt" not in "".join(included)
385
+ assert "modified.txt" not in "".join(excluded)
386
+ assert "renamed" not in "".join(excluded)
@@ -1 +0,0 @@
1
- from .patch_fixer import fix_patch
File without changes
File without changes
File without changes