patch-fixer 0.4.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/PKG-INFO +1 -1
  2. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/patch_fixer/patch_fixer.py +47 -46
  3. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/patch_fixer.egg-info/PKG-INFO +1 -1
  4. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/pyproject.toml +1 -1
  5. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/tests/test_hunk_finding.py +0 -17
  6. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/LICENSE +0 -0
  7. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/README.md +0 -0
  8. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/patch_fixer/__init__.py +0 -0
  9. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/patch_fixer/cli.py +0 -0
  10. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/patch_fixer/split.py +0 -0
  11. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/patch_fixer.egg-info/SOURCES.txt +0 -0
  12. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/patch_fixer.egg-info/dependency_links.txt +0 -0
  13. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/patch_fixer.egg-info/entry_points.txt +0 -0
  14. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/patch_fixer.egg-info/requires.txt +0 -0
  15. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/patch_fixer.egg-info/top_level.txt +0 -0
  16. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/setup.cfg +0 -0
  17. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/tests/test_cli.py +0 -0
  18. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/tests/test_fuzzy.py +0 -0
  19. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/tests/test_norm.py +0 -0
  20. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/tests/test_repos.py +0 -0
  21. {patch_fixer-0.4.0 → patch_fixer-0.4.1}/tests/test_split.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patch-fixer
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Fixes erroneous git apply patches to the best of its ability.
5
5
  Maintainer-email: Alex Mueller <amueller474@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -7,18 +7,18 @@ from pathlib import Path
7
7
 
8
8
  from git import Repo
9
9
 
10
- path_regex = r'(?:[A-Za-z0-9_.-]+/?)+'
10
+ path_regex = r'[^ \n\t]+(?: [^ \n\t]+)*'
11
11
  regexes = {
12
- "DIFF_LINE": re.compile(rf'diff --git (a/{path_regex}) (b/{path_regex})'),
13
- "MODE_LINE": re.compile(r'(new|deleted) file mode [0-7]{6}'),
14
- "INDEX_LINE": re.compile(r'index [0-9a-f]{7,64}\.\.[0-9a-f]{7,64}(?: [0-7]{6})?|similarity index ([0-9]+)%'),
15
- "BINARY_LINE": re.compile(rf'Binary files (a/{path_regex}|/dev/null) and (b/{path_regex}|/dev/null) differ'),
16
- "RENAME_FROM": re.compile(rf'rename from ({path_regex})'),
17
- "RENAME_TO": re.compile(rf'rename to ({path_regex})'),
18
- "FILE_HEADER_START": re.compile(rf'--- (a/{path_regex}|/dev/null)'),
19
- "FILE_HEADER_END": re.compile(rf'\+\+\+ (b/{path_regex}|/dev/null)'),
12
+ "DIFF_LINE": re.compile(rf'^diff --git (a/{path_regex}) (b/{path_regex})$'),
13
+ "MODE_LINE": re.compile(r'^(new|deleted) file mode [0-7]{6}$'),
14
+ "INDEX_LINE": re.compile(r'^index [0-9a-f]{7,64}\.\.[0-9a-f]{7,64}(?: [0-7]{6})?$|^similarity index ([0-9]+)%$'),
15
+ "BINARY_LINE": re.compile(rf'^Binary files (a/{path_regex}|/dev/null) and (b/{path_regex}|/dev/null) differ$'),
16
+ "RENAME_FROM": re.compile(rf'^rename from ({path_regex})$'),
17
+ "RENAME_TO": re.compile(rf'^rename to ({path_regex})$'),
18
+ "FILE_HEADER_START": re.compile(rf'^--- (a/{path_regex}|/dev/null)$'),
19
+ "FILE_HEADER_END": re.compile(rf'^\+\+\+ (b/{path_regex}|/dev/null)$'),
20
20
  "HUNK_HEADER": re.compile(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(.*)$'),
21
- "END_LINE": re.compile(r'\')
21
+ "END_LINE": re.compile(r'^\$'),
22
22
  }
23
23
 
24
24
 
@@ -62,6 +62,12 @@ class OutOfOrderHunk(HunkErrorBase):
62
62
  f"\nOccurs before previous hunk with header {self.prev_header}")
63
63
 
64
64
 
65
+ class EmptyHunk(Exception):
66
+ # don't inherit from HunkErrorBase since this is a sentinel exception
67
+ # meant to catch the case where the very last hunk is empty
68
+ pass
69
+
70
+
65
71
  class BadCarriageReturn(ValueError):
66
72
  pass
67
73
 
@@ -145,19 +151,6 @@ def find_hunk_start(context_lines, original_lines, fuzzy=False):
145
151
  if all(equal_lines):
146
152
  return i
147
153
 
148
- # try with more flexible whitespace matching
149
- for i in range(len(original_lines) - len(ctx) + 1):
150
- equal_lines = []
151
- for j in range(len(ctx)):
152
- orig_line = original_lines[i + j].strip()
153
- ctx_line = ctx[j].strip()
154
- # normalize whitespace: convert multiple spaces/tabs to single space
155
- orig_normalized = ' '.join(orig_line.split())
156
- ctx_normalized = ' '.join(ctx_line.split())
157
- equal_lines.append(orig_normalized == ctx_normalized)
158
- if all(equal_lines):
159
- return i
160
-
161
154
  # if fuzzy matching is enabled and exact match failed, try fuzzy match
162
155
  if fuzzy:
163
156
  best_match_score = 0.0
@@ -226,9 +219,13 @@ def find_all_hunk_starts(hunk_lines, search_lines, fuzzy=False):
226
219
  def capture_hunk(current_hunk, original_lines, offset, last_hunk, old_header, fuzzy=False):
227
220
  """
228
221
  Try to locate the hunk's true position in the original file.
222
+
229
223
  If multiple possible matches exist, pick the one closest to the expected
230
224
  (possibly corrupted) line number derived from the old hunk header.
231
225
  """
226
+ if not current_hunk:
227
+ raise EmptyHunk
228
+
232
229
  # extract needed info from old header match groups
233
230
  expected_old_start = int(old_header[0]) if old_header else 0
234
231
  try:
@@ -236,11 +233,27 @@ def capture_hunk(current_hunk, original_lines, offset, last_hunk, old_header, fu
236
233
  except IndexError:
237
234
  hunk_context = ""
238
235
 
236
+ # presence or absence of end line shouldn't affect line counts
237
+ if regexes["END_LINE"].match(current_hunk[-1]):
238
+ hunk_len = len(current_hunk) - 1
239
+ else:
240
+ hunk_len = len(current_hunk)
241
+
239
242
  # compute line counts
240
- old_count = sum(1 for l in current_hunk if l.startswith((' ', '-')))
241
- new_count = sum(1 for l in current_hunk if l.startswith((' ', '+')))
243
+ context_count = sum(1 for l in current_hunk if l.startswith(' '))
244
+ minus_count = sum(1 for l in current_hunk if l.startswith('-'))
245
+ plus_count = sum(1 for l in current_hunk if l.startswith('+'))
242
246
 
243
- if old_count > 0:
247
+ old_count = context_count + minus_count
248
+ new_count = context_count + plus_count
249
+
250
+ if minus_count == hunk_len: # file deletion
251
+ old_start = 1
252
+ new_start = 0
253
+ elif plus_count == hunk_len: # file creation
254
+ old_start = 0
255
+ new_start = 1
256
+ else: # file modification
244
257
  search_index = last_hunk
245
258
  search_lines = original_lines[search_index:]
246
259
 
@@ -260,8 +273,10 @@ def capture_hunk(current_hunk, original_lines, offset, last_hunk, old_header, fu
260
273
  # pick first match if no expected line info
261
274
  old_start = candidate_positions[0] + 1
262
275
  else:
263
- # try from start of file as fallback
264
- matches = find_all_hunk_starts(current_hunk, original_lines, fuzzy=fuzzy)
276
+ # try from start of file, excluding lines already searched
277
+ search_index += hunk_len
278
+ search_lines = original_lines[:search_index]
279
+ matches = find_all_hunk_starts(current_hunk, search_lines, fuzzy=fuzzy)
265
280
  if not matches:
266
281
  raise MissingHunkError(current_hunk)
267
282
  if expected_old_start:
@@ -279,11 +294,6 @@ def capture_hunk(current_hunk, original_lines, offset, last_hunk, old_header, fu
279
294
  new_start = 0
280
295
  else:
281
296
  new_start = old_start + offset
282
- else:
283
- # old count of zero can only mean file creation, since adding lines to
284
- # an existing file requires surrounding context lines without a +
285
- old_start = 0
286
- new_start = 1 # line numbers are 1-indexed in the real world
287
297
 
288
298
  offset += (new_count - old_count)
289
299
 
@@ -362,7 +372,6 @@ def fix_patch(patch_lines, original, remove_binary=False, fuzzy=False, add_newli
362
372
  file_start_header = False
363
373
  file_end_header = False
364
374
  look_for_rename = False
365
- similarity_index = None
366
375
  missing_index = False
367
376
  binary_file = False
368
377
  current_hunk_header = ()
@@ -437,15 +446,6 @@ def fix_patch(patch_lines, original, remove_binary=False, fuzzy=False, add_newli
437
446
  current_path = Path(current_file).absolute()
438
447
  offset = 0
439
448
  last_hunk = 0
440
- if not Path.exists(current_path):
441
- # this is meant to handle cases where the source file
442
- # doesn't exist (e.g., when applying a patch that renames
443
- # a file created earlier in the same patch)
444
- # TODO: but really, does that ever happen???
445
- fixed_lines.append(normalize_line(line))
446
- look_for_rename = True
447
- file_loaded = False
448
- continue
449
449
  if not current_path.is_file():
450
450
  raise IsADirectoryError(f"Rename from header points to a directory, not a file: {current_file}")
451
451
  if dir_mode or current_path == original_path:
@@ -462,7 +462,7 @@ def fix_patch(patch_lines, original, remove_binary=False, fuzzy=False, add_newli
462
462
  last_index = i - 2
463
463
  else:
464
464
  raise NotImplementedError("Missing `rename from` header not yet supported.")
465
- if not look_for_rename:
465
+ if not file_loaded:
466
466
  # if we're not looking for a rename but encounter "rename to",
467
467
  # this indicates a malformed patch - log warning but continue
468
468
  warnings.warn(
@@ -632,6 +632,8 @@ def fix_patch(patch_lines, original, remove_binary=False, fuzzy=False, add_newli
632
632
  offset,
633
633
  last_hunk
634
634
  ) = capture_hunk(current_hunk, original_lines, offset, last_hunk, current_hunk_header, fuzzy=fuzzy)
635
+ except EmptyHunk:
636
+ return fixed_lines
635
637
  except (MissingHunkError, OutOfOrderHunk) as e:
636
638
  e.add_file(current_file)
637
639
  raise e
@@ -669,5 +671,4 @@ def main():
669
671
 
670
672
 
671
673
  if __name__ == "__main__":
672
- main()
673
-
674
+ main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patch-fixer
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Fixes erroneous git apply patches to the best of its ability.
5
5
  Maintainer-email: Alex Mueller <amueller474@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "patch-fixer"
7
- version = "0.4.0"
7
+ version = "0.4.1"
8
8
  description = "Fixes erroneous git apply patches to the best of its ability."
9
9
  maintainers = [
10
10
  {name = "Alex Mueller", email="amueller474@gmail.com"},
@@ -22,23 +22,6 @@ class TestImprovedHunkFinding:
22
22
  expected = " \tcontext line 1\n-\tdeleted line\n \tcontext line 2\n"
23
23
  assert result == expected
24
24
 
25
- def test_whitespace_tolerant_matching(self):
26
- """Test that hunk finding tolerates whitespace differences."""
27
- original_lines = [
28
- "function test() {\n", # multiple spaces
29
- "\t\tvar x = 1;\n", # mixed tabs and spaces
30
- "\t}\n"
31
- ]
32
-
33
- context_lines = [
34
- " function test() {\n", # normalized spaces
35
- " \tvar x = 1;\n", # different whitespace
36
- " }\n"
37
- ]
38
-
39
- result = find_hunk_start(context_lines, original_lines, fuzzy=False)
40
- assert result == 0 # should find match at beginning
41
-
42
25
  def test_exact_match_prioritized(self):
43
26
  """Test that exact matches are found before whitespace-tolerant ones."""
44
27
  original_lines = [
File without changes
File without changes
File without changes