patch-fixer 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
patch_fixer/cli.py CHANGED
@@ -14,7 +14,12 @@ def fix_command(args):
14
14
  with open(args.broken_patch, encoding='utf-8') as f:
15
15
  patch_lines = f.readlines()
16
16
 
17
- fixed_lines = fix_patch(patch_lines, args.original)
17
+ fixed_lines = fix_patch(
18
+ patch_lines,
19
+ args.original,
20
+ fuzzy=args.fuzzy,
21
+ add_newline=args.add_newline
22
+ )
18
23
 
19
24
  with open(args.output, 'w', encoding='utf-8') as f:
20
25
  f.writelines(fixed_lines)
@@ -77,6 +82,16 @@ def main():
77
82
  'output',
78
83
  help='Path where the fixed patch will be written'
79
84
  )
85
+ fix_parser.add_argument(
86
+ '--fuzzy',
87
+ action='store_true',
88
+ help='Enable fuzzy string matching when finding hunks in original files'
89
+ )
90
+ fix_parser.add_argument(
91
+ '--add-newline',
92
+ action='store_true',
93
+ help='Add final newline when processing "No newline at end of file" markers'
94
+ )
80
95
 
81
96
  # split command
82
97
  split_parser = subparsers.add_parser(
@@ -2,6 +2,7 @@
2
2
  import os
3
3
  import re
4
4
  import sys
5
+ import warnings
5
6
  from pathlib import Path
6
7
 
7
8
  from git import Repo
@@ -61,7 +62,29 @@ def normalize_line(line):
61
62
  return core + "\n"
62
63
 
63
64
 
64
- def find_hunk_start(context_lines, original_lines):
65
+ def fuzzy_line_similarity(line1, line2, threshold=0.8):
66
+ """Calculate similarity between two lines using a simple ratio."""
67
+ if not line1 or not line2:
68
+ return 0.0
69
+
70
+ l1, l2 = line1.strip(), line2.strip()
71
+
72
+ if l1 == l2:
73
+ return 1.0
74
+
75
+ if len(l1) == 0 or len(l2) == 0:
76
+ return 0.0
77
+
78
+ # count common characters
79
+ common = 0
80
+ for char in set(l1) & set(l2):
81
+ common += min(l1.count(char), l2.count(char))
82
+
83
+ total_chars = len(l1) + len(l2)
84
+ return (2.0 * common) / total_chars if total_chars > 0 else 0.0
85
+
86
+
87
+ def find_hunk_start(context_lines, original_lines, fuzzy=False):
65
88
  """Search original_lines for context_lines and return start line index (0-based)."""
66
89
  ctx = []
67
90
  for line in context_lines:
@@ -74,11 +97,33 @@ def find_hunk_start(context_lines, original_lines):
74
97
  ctx.append(line)
75
98
  if not ctx:
76
99
  raise ValueError("Cannot search for empty hunk.")
100
+
101
+ # first try exact matching
77
102
  for i in range(len(original_lines) - len(ctx) + 1):
78
103
  # this part will fail if the diff is malformed beyond hunk header
79
- equal_lines = [original_lines[i+j].strip() == ctx[j].strip() for j in range(len(ctx))]
104
+ equal_lines = [original_lines[i + j].strip() == ctx[j].strip() for j in range(len(ctx))]
80
105
  if all(equal_lines):
81
106
  return i
107
+
108
+ # if fuzzy matching is enabled and exact match failed, try fuzzy match
109
+ if fuzzy:
110
+ best_match_score = 0.0
111
+ best_match_pos = 0
112
+
113
+ for i in range(len(original_lines) - len(ctx) + 1):
114
+ total_similarity = 0.0
115
+ for j in range(len(ctx)):
116
+ similarity = fuzzy_line_similarity(original_lines[i + j], ctx[j])
117
+ total_similarity += similarity
118
+
119
+ avg_similarity = total_similarity / len(ctx)
120
+ if avg_similarity > best_match_score and avg_similarity > 0.6:
121
+ best_match_score = avg_similarity
122
+ best_match_pos = i
123
+
124
+ if best_match_score > 0.6:
125
+ return best_match_pos
126
+
82
127
  return 0
83
128
 
84
129
 
@@ -111,14 +156,14 @@ def reconstruct_file_header(diff_line, header_type):
111
156
  raise ValueError(f"Unsupported header type: {header_type}")
112
157
 
113
158
 
114
- def capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context):
159
+ def capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context, fuzzy=False):
115
160
  # compute line counts
116
161
  old_count = sum(1 for l in current_hunk if l.startswith((' ', '-')))
117
162
  new_count = sum(1 for l in current_hunk if l.startswith((' ', '+')))
118
163
 
119
164
  if old_count > 0:
120
165
  # compute starting line in original file
121
- old_start = find_hunk_start(current_hunk, original_lines) + 1
166
+ old_start = find_hunk_start(current_hunk, original_lines, fuzzy=fuzzy) + 1
122
167
 
123
168
  # if the line number descends, we either have a bad match or a new file
124
169
  if old_start < last_hunk:
@@ -147,7 +192,11 @@ def capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context):
147
192
 
148
193
  def regenerate_index(old_path, new_path, cur_dir):
149
194
  repo = Repo(cur_dir)
150
- mode = " 100644" # TODO: check if mode can be a different number
195
+
196
+ # Common git file modes: 100644 (regular file), 100755 (executable file),
197
+ # 120000 (symbolic link), 160000 (submodule), 040000 (tree/directory)
198
+ # TODO: guess mode based on above information
199
+ mode = " 100644"
151
200
 
152
201
  # file deletion
153
202
  if new_path == "/dev/null":
@@ -164,12 +213,15 @@ def regenerate_index(old_path, new_path, cur_dir):
164
213
  return f"index {old_sha}..{new_sha}{mode}\n"
165
214
 
166
215
 
167
- def fix_patch(patch_lines, original, remove_binary=False):
216
+ def fix_patch(patch_lines, original, remove_binary=False, fuzzy=False, add_newline=False):
168
217
  dir_mode = os.path.isdir(original)
169
218
  original_path = Path(original).absolute()
170
219
 
171
220
  # make relative paths in the diff work
172
- os.chdir(original_path)
221
+ if dir_mode:
222
+ os.chdir(original_path)
223
+ else:
224
+ os.chdir(original_path.parent)
173
225
 
174
226
  fixed_lines = []
175
227
  current_hunk = []
@@ -201,7 +253,7 @@ def fix_patch(patch_lines, original, remove_binary=False):
201
253
  fixed_header,
202
254
  offset,
203
255
  last_hunk
204
- ) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context)
256
+ ) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context, fuzzy=fuzzy)
205
257
  except MissingHunkError:
206
258
  raise NotImplementedError(f"Could not find hunk in {current_file}:"
207
259
  f"\n\n{''.join(current_hunk)}")
@@ -224,7 +276,12 @@ def fix_patch(patch_lines, original, remove_binary=False):
224
276
  last_mode = i
225
277
  fixed_lines.append(normalize_line(line))
226
278
  case "INDEX_LINE":
227
- # TODO: verify that mode is present for anything but deletion
279
+ # mode should be present in index line for all operations except file deletion
280
+ # for deletions, the mode is omitted since the file no longer exists
281
+ index_line = normalize_line(line).strip()
282
+ if not index_line.endswith("..0000000") and not re.search(r' [0-7]{6}$', index_line):
283
+ # TODO: this is the right idea, but a poor implementation
284
+ pass
228
285
  last_index = i
229
286
  similarity_index = match_groups[0]
230
287
  if similarity_index:
@@ -238,7 +295,9 @@ def fix_patch(patch_lines, original, remove_binary=False):
238
295
  fixed_lines.append(normalize_line(line))
239
296
  case "RENAME_FROM":
240
297
  if not look_for_rename:
241
- pass # TODO: handle missing index line
298
+ # handle case where rename from appears without corresponding index line
299
+ # this may indicate a malformed patch, but we can try to continue
300
+ warnings.warn(f"Warning: 'rename from' found without expected index line at line {i+1}")
242
301
  if binary_file:
243
302
  raise NotImplementedError("Renaming binary files not yet supported")
244
303
  if last_index != i - 1:
@@ -252,7 +311,10 @@ def fix_patch(patch_lines, original, remove_binary=False):
252
311
  offset = 0
253
312
  last_hunk = 0
254
313
  if not Path.exists(current_path):
255
- # TODO: verify whether this block is necessary at all
314
+ # this is meant to handle cases where the source file
315
+ # doesn't exist (e.g., when applying a patch that renames
316
+ # a file created earlier in the same patch)
317
+ # TODO: but really, does that ever happen???
256
318
  fixed_lines.append(normalize_line(line))
257
319
  look_for_rename = True
258
320
  file_loaded = False
@@ -273,7 +335,12 @@ def fix_patch(patch_lines, original, remove_binary=False):
273
335
  last_index = i - 2
274
336
  else:
275
337
  raise NotImplementedError("Missing `rename from` header not yet supported.")
276
- # TODO: do something sensible if `look_for_rename` is false
338
+ if not look_for_rename:
339
+ # if we're not looking for a rename but encounter "rename to",
340
+ # this indicates a malformed patch - log warning but continue
341
+ warnings.warn(
342
+ f"Warning: unexpected 'rename to' found at line {i + 1} without corresponding 'rename from'"
343
+ )
277
344
  current_file = match_groups[0]
278
345
  current_path = Path(current_file).absolute()
279
346
  if current_file and current_path.is_dir():
@@ -412,7 +479,7 @@ def fix_patch(patch_lines, original, remove_binary=False):
412
479
  fixed_header,
413
480
  offset,
414
481
  last_hunk
415
- ) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context)
482
+ ) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context, fuzzy=fuzzy)
416
483
  except MissingHunkError:
417
484
  raise NotImplementedError(f"Could not find hunk in {current_file}:"
418
485
  f"\n\n{''.join(current_hunk)}")
@@ -421,10 +488,13 @@ def fix_patch(patch_lines, original, remove_binary=False):
421
488
  current_hunk = []
422
489
  hunk_context = match_groups[4]
423
490
  case "END_LINE":
424
- # TODO: add newline at end of file if user requests
425
- fixed_lines.append(normalize_line(line))
491
+ # if user requested, add a newline at end of file when this marker is present
492
+ if add_newline:
493
+ fixed_lines.append("\n")
494
+ else:
495
+ fixed_lines.append(normalize_line(line))
426
496
  case _:
427
- # TODO: fuzzy string matching
497
+ # TODO: fix fuzzy string matching to be less granular
428
498
  # this is a normal line, add to current hunk
429
499
  current_hunk.append(normalize_line(line))
430
500
 
@@ -434,7 +504,7 @@ def fix_patch(patch_lines, original, remove_binary=False):
434
504
  fixed_header,
435
505
  offset,
436
506
  last_hunk
437
- ) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context)
507
+ ) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context, fuzzy=fuzzy)
438
508
  except MissingHunkError:
439
509
  raise NotImplementedError(f"Could not find hunk in {current_file}:"
440
510
  f"\n\n{''.join(current_hunk)}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patch-fixer
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: Fixes erroneous git apply patches to the best of its ability.
5
5
  Maintainer-email: Alex Mueller <amueller474@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -107,6 +107,13 @@ with open("excluded.patch", 'w', encoding='utf-8') as f:
107
107
  f.writelines(excluded)
108
108
  ```
109
109
 
110
+ ## Known Limitations
111
+
112
+ - When fixing patches with missing `index` lines, the tool requires the files to be in a git repository to regenerate the index. This is only needed for file deletions and renames.
113
+ - `patch-fixer` assumes the patch follows git's unified diff format.
114
+ - Current implementation is not very robust to corrupted hunk content
115
+ - Much more comprehensive fuzzy string matching is planned
116
+
110
117
  ## Local Testing
111
118
  ```bash
112
119
  git clone https://github.com/ajcm474/patch-fixer.git
@@ -0,0 +1,10 @@
1
+ patch_fixer/__init__.py,sha256=n5DDMr4jbO3epK3ybBvjDyRddTWlWamN6ao5BC7xHFo,65
2
+ patch_fixer/cli.py,sha256=4zy02FsVrUrcQzsBwQ58PVfJXoG4OsOYKpk2JXGw1cY,3841
3
+ patch_fixer/patch_fixer.py,sha256=eqrqe6jKlEWiCjOiLiFnq9oPi1HZPrZBSEsCcEANeFw,23478
4
+ patch_fixer/split.py,sha256=l0rHM6-ZBuB9Iv6Ng6rxqZH5eKfvk2t87j__nDu67kM,3869
5
+ patch_fixer-0.3.4.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
6
+ patch_fixer-0.3.4.dist-info/METADATA,sha256=cV7wioKTFQulrTUB9R_s_lDfDNJDYfwEp3uSho2fqXc,4521
7
+ patch_fixer-0.3.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
+ patch_fixer-0.3.4.dist-info/entry_points.txt,sha256=ftc6dP6B1zJouSPeCCJLZtx-EEGVSrNEwy4YhtnEoxA,53
9
+ patch_fixer-0.3.4.dist-info/top_level.txt,sha256=yyp3KjFgExJsrFsS9ZBCnkhb05xg8hPYhB7ncdpTOv0,12
10
+ patch_fixer-0.3.4.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- patch_fixer/__init__.py,sha256=n5DDMr4jbO3epK3ybBvjDyRddTWlWamN6ao5BC7xHFo,65
2
- patch_fixer/cli.py,sha256=hgneS8DSCWBxv1l0u37n60FPGskAsDgodX9YxKgF-H0,3417
3
- patch_fixer/patch_fixer.py,sha256=GAavb15H5cEoNFgGlO5hIY7EOF88VCsjHcLrfyGW4_0,20587
4
- patch_fixer/split.py,sha256=l0rHM6-ZBuB9Iv6Ng6rxqZH5eKfvk2t87j__nDu67kM,3869
5
- patch_fixer-0.3.3.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
6
- patch_fixer-0.3.3.dist-info/METADATA,sha256=URfv2ws8naMvx3t8xsqN5JyGBlC38EydFYPwgivTnvQ,4117
7
- patch_fixer-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
8
- patch_fixer-0.3.3.dist-info/entry_points.txt,sha256=ftc6dP6B1zJouSPeCCJLZtx-EEGVSrNEwy4YhtnEoxA,53
9
- patch_fixer-0.3.3.dist-info/top_level.txt,sha256=yyp3KjFgExJsrFsS9ZBCnkhb05xg8hPYhB7ncdpTOv0,12
10
- patch_fixer-0.3.3.dist-info/RECORD,,