patch-fixer 0.2.3__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,16 +6,16 @@ from pathlib import Path
6
6
 
7
7
  from git import Repo
8
8
 
9
- path_regex = r'(?:/[A-Za-z0-9_.-]+)*'
9
+ path_regex = r'(?:[A-Za-z0-9_.-]+/?)+'
10
10
  regexes = {
11
- "DIFF_LINE": re.compile(rf'diff --git (a{path_regex}+) (b{path_regex}+)'),
11
+ "DIFF_LINE": re.compile(rf'diff --git (a/{path_regex}) (b/{path_regex})'),
12
12
  "MODE_LINE": re.compile(r'(new|deleted) file mode [0-7]{6}'),
13
13
  "INDEX_LINE": re.compile(r'index [0-9a-f]{7,64}\.\.[0-9a-f]{7,64}(?: [0-7]{6})?|similarity index ([0-9]+)%'),
14
- "BINARY_LINE": re.compile(rf'Binary files (a{path_regex}+|/dev/null) and (b{path_regex}+|/dev/null) differ'),
14
+ "BINARY_LINE": re.compile(rf'Binary files (a/{path_regex}|/dev/null) and (b/{path_regex}|/dev/null) differ'),
15
15
  "RENAME_FROM": re.compile(rf'rename from ({path_regex})'),
16
16
  "RENAME_TO": re.compile(rf'rename to ({path_regex})'),
17
- "FILE_HEADER_START": re.compile(rf'--- (a{path_regex}+|/dev/null)'),
18
- "FILE_HEADER_END": re.compile(rf'\+\+\+ (b{path_regex}+|/dev/null)'),
17
+ "FILE_HEADER_START": re.compile(rf'--- (a/{path_regex}|/dev/null)'),
18
+ "FILE_HEADER_END": re.compile(rf'\+\+\+ (b/{path_regex}|/dev/null)'),
19
19
  "HUNK_HEADER": re.compile(r'^@@ -(\d+),(\d+) \+(\d+),(\d+) @@(.*)$'),
20
20
  "END_LINE": re.compile(r'\')
21
21
  }
@@ -25,13 +25,41 @@ class MissingHunkError(Exception):
25
25
  pass
26
26
 
27
27
 
28
+ class BadCarriageReturn(ValueError):
29
+ pass
30
+
31
+
28
32
  def normalize_line(line):
29
- if line.startswith('+'):
30
- # safe to normalize new content
31
- return '+' + line[1:].rstrip() + "\n"
33
+ """Normalize line endings while preserving whitespace."""
34
+ if not isinstance(line, str):
35
+ raise TypeError(f"Cannot normalize non-string object {line}")
36
+
37
+ # edge case: empty string
38
+ if line == "":
39
+ return "\n"
40
+
41
+ # special malformed ending: ...\n\r
42
+ if line.endswith("\n\r"):
43
+ raise BadCarriageReturn(f"carriage return after line feed: {line}")
44
+
45
+ # handle CRLF and simple CR/LF endings
46
+ if line.endswith("\r\n"):
47
+ core = line[:-2]
48
+ elif line.endswith("\r"):
49
+ core = line[:-1]
50
+ elif line.endswith("\n"):
51
+ core = line[:-1]
32
52
  else:
33
- # preserve exactly (only normalize line endings)
34
- return line.rstrip("\r\n") + "\n"
53
+ core = line
54
+
55
+ # check for interior CR/LF (anything before the final terminator)
56
+ if "\n" in core:
57
+ raise ValueError(f"line feed in middle of line: {line}")
58
+ if "\r" in core:
59
+ raise BadCarriageReturn(f"carriage return in middle of line: {line}")
60
+
61
+ return core + "\n"
62
+
35
63
 
36
64
  def find_hunk_start(context_lines, original_lines):
37
65
  """Search original_lines for context_lines and return start line index (0-based)."""
@@ -40,7 +68,8 @@ def find_hunk_start(context_lines, original_lines):
40
68
  if line.startswith(" "):
41
69
  ctx.append(line.lstrip(" "))
42
70
  elif line.startswith("-"):
43
- ctx.append(line.lstrip("-"))
71
+ # can't use lstrip; we want to keep other dashes in the line
72
+ ctx.append(line[1:])
44
73
  elif line.isspace() or line == "":
45
74
  ctx.append(line)
46
75
  if not ctx:
@@ -75,9 +104,9 @@ def reconstruct_file_header(diff_line, header_type):
75
104
  a, b = diff_groups
76
105
  match header_type:
77
106
  case "FILE_HEADER_START":
78
- return f"--- {a}"
107
+ return f"--- {a}\n"
79
108
  case "FILE_HEADER_END":
80
- return f"+++ {b}"
109
+ return f"+++ {b}\n"
81
110
  case _:
82
111
  raise ValueError(f"Unsupported header type: {header_type}")
83
112
 
@@ -87,14 +116,24 @@ def capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context):
87
116
  old_count = sum(1 for l in current_hunk if l.startswith((' ', '-')))
88
117
  new_count = sum(1 for l in current_hunk if l.startswith((' ', '+')))
89
118
 
90
- # compute starting line in original file
91
- old_start = find_hunk_start(current_hunk, original_lines) + 1
92
-
93
- # if the line number descends, we either have a bad match or a new file
94
- if old_start < last_hunk:
95
- raise MissingHunkError
119
+ if old_count > 0:
120
+ # compute starting line in original file
121
+ old_start = find_hunk_start(current_hunk, original_lines) + 1
122
+
123
+ # if the line number descends, we either have a bad match or a new file
124
+ if old_start < last_hunk:
125
+ raise MissingHunkError
126
+ else:
127
+ if new_count == 0:
128
+ # complete deletion of remaining content
129
+ new_start = 0
130
+ else:
131
+ new_start = old_start + offset
96
132
  else:
97
- new_start = old_start + offset
133
+ # old count of zero can only mean file creation, since adding lines to
134
+ # an existing file requires surrounding context lines without a +
135
+ old_start = 0
136
+ new_start = 1 # line numbers are 1-indexed in the real world
98
137
 
99
138
  offset += (new_count - old_count)
100
139
 
@@ -122,10 +161,10 @@ def regenerate_index(old_path, new_path, cur_dir):
122
161
  "as this would require manually applying the patch first."
123
162
  )
124
163
 
125
- return f"index {old_sha}..{new_sha}{mode}"
164
+ return f"index {old_sha}..{new_sha}{mode}\n"
126
165
 
127
166
 
128
- def fix_patch(patch_lines, original):
167
+ def fix_patch(patch_lines, original, remove_binary=False):
129
168
  dir_mode = os.path.isdir(original)
130
169
  original_path = Path(original).absolute()
131
170
 
@@ -146,7 +185,10 @@ def fix_patch(patch_lines, original):
146
185
  look_for_rename = False
147
186
  similarity_index = None
148
187
  missing_index = False
188
+ binary_file = False
149
189
  hunk_context = ""
190
+ original_lines = []
191
+ file_loaded = False
150
192
 
151
193
  for i, line in enumerate(patch_lines):
152
194
  match_groups, line_type = match_line(line)
@@ -168,12 +210,14 @@ def fix_patch(patch_lines, original):
168
210
  current_hunk = []
169
211
  a, b = split_ab(match_groups)
170
212
  if a != b:
171
- raise ValueError(f"Diff paths do not match: \n{a}\n{b}")
213
+ look_for_rename = True
172
214
  fixed_lines.append(normalize_line(line))
173
215
  last_diff = i
174
216
  file_start_header = False
175
217
  file_end_header = False
176
218
  first_hunk = True
219
+ binary_file = False
220
+ file_loaded = False
177
221
  case "MODE_LINE":
178
222
  if last_diff != i - 1:
179
223
  raise NotImplementedError("Missing diff line not yet supported")
@@ -188,14 +232,18 @@ def fix_patch(patch_lines, original):
188
232
  fixed_lines.append(normalize_line(line))
189
233
  missing_index = False
190
234
  case "BINARY_LINE":
191
- raise NotImplementedError("Binary files not supported yet")
235
+ if remove_binary:
236
+ raise NotImplementedError("Ignoring binary files not yet supported")
237
+ binary_file = True
238
+ fixed_lines.append(normalize_line(line))
192
239
  case "RENAME_FROM":
193
240
  if not look_for_rename:
194
241
  pass # TODO: handle missing index line
242
+ if binary_file:
243
+ raise NotImplementedError("Renaming binary files not yet supported")
195
244
  if last_index != i - 1:
196
245
  missing_index = True # need this for existence check in RENAME_TO block
197
- similarity_index = 100 # TODO: is this a dangerous assumption?
198
- fixed_index = "similarity index 100%"
246
+ fixed_index = "similarity index 100%\n"
199
247
  fixed_lines.append(normalize_line(fixed_index))
200
248
  last_index = i - 1
201
249
  look_for_rename = False
@@ -204,16 +252,18 @@ def fix_patch(patch_lines, original):
204
252
  offset = 0
205
253
  last_hunk = 0
206
254
  if not Path.exists(current_path):
207
- if similarity_index == 100:
208
- fixed_lines.append(normalize_line(line))
209
- look_for_rename = True
210
- continue
211
- raise NotImplementedError("Parsing files that were both renamed and modified is not yet supported.")
255
+ # TODO: verify whether this block is necessary at all
256
+ fixed_lines.append(normalize_line(line))
257
+ look_for_rename = True
258
+ file_loaded = False
259
+ continue
260
+ if not current_path.is_file():
261
+ raise IsADirectoryError(f"Rename from header points to a directory, not a file: {current_file}")
212
262
  if dir_mode or current_path == original_path:
213
263
  with open(current_path, encoding='utf-8') as f:
214
264
  original_lines = [l.rstrip('\n') for l in f.readlines()]
215
265
  fixed_lines.append(normalize_line(line))
216
- # TODO: analogous boolean to `file_start_header`?
266
+ file_loaded = True
217
267
  else:
218
268
  raise FileNotFoundError(f"Filename {current_file} in `rename from` header does not match argument {original}")
219
269
  case "RENAME_TO":
@@ -223,18 +273,20 @@ def fix_patch(patch_lines, original):
223
273
  last_index = i - 2
224
274
  else:
225
275
  raise NotImplementedError("Missing `rename from` header not yet supported.")
226
- if look_for_rename:
227
- # the old file doesn't exist, so we need to read this one
228
- current_file = match_groups[0]
229
- current_path = Path(current_file).absolute()
230
- with open(current_path, encoding='utf-8') as f:
231
- original_lines = [l.rstrip('\n') for l in f.readlines()]
232
- fixed_lines.append(normalize_line(line))
233
- look_for_rename = False
234
- pass
276
+ # TODO: do something sensible if `look_for_rename` is false
277
+ current_file = match_groups[0]
278
+ current_path = Path(current_file).absolute()
279
+ if current_file and current_path.is_dir():
280
+ raise IsADirectoryError(f"rename to points to a directory, not a file: {current_file}")
281
+ fixed_lines.append(normalize_line(line))
282
+ look_for_rename = False
235
283
  case "FILE_HEADER_START":
236
284
  if look_for_rename:
237
285
  raise NotImplementedError("Replacing file header with rename not yet supported.")
286
+ if binary_file:
287
+ raise NotImplementedError("A header block with both 'binary files differ' and "
288
+ "file start/end headers is a confusing state"
289
+ "\nfrom which there is no obvious way to recover.")
238
290
  if last_index != i - 1:
239
291
  missing_index = True
240
292
  last_index = i - 1
@@ -242,13 +294,15 @@ def fix_patch(patch_lines, original):
242
294
  if current_file and not dir_mode:
243
295
  raise ValueError("Diff references multiple files but only one provided.")
244
296
  current_file = match_groups[0]
245
- offset = 0
246
- last_hunk = 0
297
+ if not file_loaded:
298
+ offset = 0
299
+ last_hunk = 0
247
300
  if current_file == "/dev/null":
248
301
  if last_diff > last_mode:
249
302
  raise NotImplementedError("Missing mode line not yet supported")
250
303
  fixed_lines.append(normalize_line(line))
251
304
  file_start_header = True
305
+ file_loaded = False
252
306
  continue
253
307
  if current_file.startswith("a/"):
254
308
  current_file = current_file[2:]
@@ -257,16 +311,24 @@ def fix_patch(patch_lines, original):
257
311
  current_path = Path(current_file).absolute()
258
312
  if not current_path.exists():
259
313
  raise FileNotFoundError(f"File header start points to non-existent file: {current_file}")
260
- if dir_mode or Path(current_file) == Path(original):
261
- with open(current_file, encoding='utf-8') as f:
262
- original_lines = [l.rstrip('\n') for l in f.readlines()]
263
- fixed_lines.append(normalize_line(line))
264
- file_start_header = True
265
- else:
266
- raise FileNotFoundError(f"Filename {current_file} in header does not match argument {original}")
314
+ if not current_path.is_file():
315
+ raise IsADirectoryError(f"File header start points to a directory, not a file: {current_file}")
316
+ if not file_loaded:
317
+ if dir_mode or Path(current_file) == Path(original):
318
+ with open(current_file, encoding='utf-8') as f:
319
+ original_lines = [l.rstrip('\n') for l in f.readlines()]
320
+ file_loaded = True
321
+ else:
322
+ raise FileNotFoundError(f"Filename {current_file} in header does not match argument {original}")
323
+ fixed_lines.append(normalize_line(line))
324
+ file_start_header = True
267
325
  case "FILE_HEADER_END":
268
326
  if look_for_rename:
269
327
  raise NotImplementedError("Replacing file header with rename not yet supported.")
328
+ if binary_file:
329
+ raise NotImplementedError("A header block with both 'binary files differ' and "
330
+ "file start/end headers is a confusing state"
331
+ "\nfrom which there is no obvious way to recover.")
270
332
  dest_file = match_groups[0]
271
333
  dest_path = Path(dest_file).absolute()
272
334
  if dest_file.startswith("b/"):
@@ -286,37 +348,47 @@ def fix_patch(patch_lines, original):
286
348
  else:
287
349
  # reconstruct file start header based on end header
288
350
  a = match_groups[0].replace("b", "a")
289
- fixed_lines.append(normalize_line(f"--- {a}"))
351
+ fixed_lines.append(normalize_line(f"--- {a}\n"))
290
352
  file_start_header = True
291
353
  elif current_file == "/dev/null":
292
354
  if dest_file == "/dev/null":
293
355
  raise ValueError("File headers cannot both be /dev/null")
294
- elif not dest_path.exists():
295
- raise FileNotFoundError(f"File header end points to non-existent file: {dest_file}")
356
+ elif dest_path.exists():
357
+ raise FileExistsError(f"File header start /dev/null implies file creation, "
358
+ f"but file header end would overwrite existing file: {dest_file}")
296
359
  current_file = dest_file
297
360
  current_path = Path(current_file).absolute()
298
361
  if dir_mode or current_path == original_path:
299
- # TODO: in dir mode, verify that current file exists in original path
300
- with open(current_path, encoding='utf-8') as f:
301
- original_lines = [l.rstrip('\n') for l in f.readlines()]
362
+ original_lines = []
302
363
  fixed_lines.append(normalize_line(line))
303
364
  file_end_header = True
304
365
  else:
305
366
  raise FileNotFoundError(f"Filename {current_file} in header does not match argument {original}")
306
367
  elif dest_file == "/dev/null":
307
- # TODO: check if other modes are possible
308
- if last_mode < last_diff:
309
- last_mode = last_diff + 1
310
- fixed_lines.insert(last_mode, "deleted file mode 100644")
311
- last_index += 1 # index comes after mode
368
+ current_path = Path(current_file).absolute()
369
+ if not current_path.exists():
370
+ raise FileNotFoundError(f"The file being 'deleted' does not exist: {current_file}")
371
+ if last_mode <= last_diff:
372
+ fixed_lines.insert(last_diff + 1, "deleted file mode 100644\n")
373
+ last_index += 1
312
374
  elif "deleted" not in fixed_lines[last_mode]:
313
- fixed_lines[last_mode] = "deleted file mode 100644"
314
- else:
315
- fixed_lines.append("deleted file mode 100644")
375
+ fixed_lines[last_mode] = "deleted file mode 100644\n"
376
+ fixed_lines.append(normalize_line(line))
377
+ file_end_header = True
316
378
  elif current_file != dest_file:
317
- raise ValueError(f"File headers do not match: \n{current_file}\n{dest_file}")
318
- pass
379
+ # this is a rename, original_lines is already set from FILE_HEADER_START
380
+ fixed_lines.append(normalize_line(line))
381
+ file_end_header = True
382
+ first_hunk = True
383
+ else:
384
+ fixed_lines.append(normalize_line(line))
385
+ file_end_header = True
319
386
  case "HUNK_HEADER":
387
+ if binary_file:
388
+ raise ValueError("Binary file can't have a hunk header.")
389
+ if look_for_rename:
390
+ raise ValueError(f"Rename header expected but not found.\n"
391
+ f"Hint: look at lines {last_diff}-{i} of the input patch.")
320
392
  # fix missing file headers before capturing the hunk
321
393
  if not file_end_header:
322
394
  diff_line = patch_lines[last_diff]
@@ -370,7 +442,7 @@ def fix_patch(patch_lines, original):
370
442
  fixed_lines.extend(current_hunk)
371
443
 
372
444
  # if original file didn't end with a newline, strip out the newline here
373
- if not original_lines[-1].endswith("\n"):
445
+ if original_lines and not original_lines[-1].endswith("\n"):
374
446
  fixed_lines[-1] = fixed_lines[-1].rstrip("\n")
375
447
 
376
448
  return fixed_lines
@@ -395,6 +467,7 @@ def main():
395
467
 
396
468
  print(f"Fixed patch written to {output_file}")
397
469
 
470
+
398
471
  if __name__ == "__main__":
399
472
  main()
400
473
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patch-fixer
3
- Version: 0.2.3
3
+ Version: 0.3.1
4
4
  Summary: Fixes erroneous git apply patches to the best of its ability.
5
5
  Maintainer-email: Alex Mueller <amueller474@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -22,6 +22,7 @@ Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
23
  Requires-Dist: GitPython
24
24
  Provides-Extra: test
25
+ Requires-Dist: hypothesis; extra == "test"
25
26
  Requires-Dist: pytest; extra == "test"
26
27
  Requires-Dist: requests; extra == "test"
27
28
  Dynamic: license-file
@@ -69,8 +70,9 @@ cd patch-fixer
69
70
  pip install -e .[test]
70
71
  pytest
71
72
  ```
72
- Note that some test failures are expected as this project is in the early alpha stage.
73
- Please only report test failures if the same test passed in a previous version.
73
+ From version `0.3.0` onward (at least until version `1.0`), some test failures are expected
74
+ in bugfix versions as I like to use test-driven development to build out new features.
75
+ Please only report test failures if the same test existed and passed in the most recent `0.x.0` version.
74
76
 
75
77
  ## License
76
78
 
@@ -0,0 +1,7 @@
1
+ patch_fixer/__init__.py,sha256=bSp2H7JW2kz1WrT0dqlg64kZpklKPp1FZlDhq2XJ2uU,34
2
+ patch_fixer/patch_fixer.py,sha256=1Ny2NwVL4qStbVCrv6QE3b_8oPsFkPomJDLoAGUqNvI,20587
3
+ patch_fixer-0.3.1.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
4
+ patch_fixer-0.3.1.dist-info/METADATA,sha256=iOhEx8d5WJV3X-mMu-0hAjQWXD-gBEPWKkiE-HnNyso,2828
5
+ patch_fixer-0.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ patch_fixer-0.3.1.dist-info/top_level.txt,sha256=yyp3KjFgExJsrFsS9ZBCnkhb05xg8hPYhB7ncdpTOv0,12
7
+ patch_fixer-0.3.1.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- patch_fixer/__init__.py,sha256=bSp2H7JW2kz1WrT0dqlg64kZpklKPp1FZlDhq2XJ2uU,34
2
- patch_fixer/patch_fixer.py,sha256=zZ48yd9eVfCFiUzJ31wVpcEOBH2Nq711kC2c71q-YzM,17463
3
- patch_fixer-0.2.3.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
4
- patch_fixer-0.2.3.dist-info/METADATA,sha256=iAC5PmTsdyuTXnWPSvCjPVTD3nrEafqCyE8PrMxvuKs,2667
5
- patch_fixer-0.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- patch_fixer-0.2.3.dist-info/top_level.txt,sha256=yyp3KjFgExJsrFsS9ZBCnkhb05xg8hPYhB7ncdpTOv0,12
7
- patch_fixer-0.2.3.dist-info/RECORD,,