patch-fixer 0.2.3__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patch-fixer
3
- Version: 0.2.3
3
+ Version: 0.3.0
4
4
  Summary: Fixes erroneous git apply patches to the best of its ability.
5
5
  Maintainer-email: Alex Mueller <amueller474@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -69,8 +69,9 @@ cd patch-fixer
69
69
  pip install -e .[test]
70
70
  pytest
71
71
  ```
72
- Note that some test failures are expected as this project is in the early alpha stage.
73
- Please only report test failures if the same test passed in a previous version.
72
+ From version `0.3.0` onward (at least until version `1.0`), some test failures are expected
73
+ in bugfix versions as I like to use test-driven development to build out new features.
74
+ Please only report test failures if the same test passed in the most recent `0.x.0` version.
74
75
 
75
76
  ## License
76
77
 
@@ -41,8 +41,9 @@ cd patch-fixer
41
41
  pip install -e .[test]
42
42
  pytest
43
43
  ```
44
- Note that some test failures are expected as this project is in the early alpha stage.
45
- Please only report test failures if the same test passed in a previous version.
44
+ From version `0.3.0` onward (at least until version `1.0`), some test failures are expected
45
+ in bugfix versions as I like to use test-driven development to build out new features.
46
+ Please only report test failures if the same test passed in the most recent `0.x.0` version.
46
47
 
47
48
  ## License
48
49
 
@@ -6,16 +6,16 @@ from pathlib import Path
6
6
 
7
7
  from git import Repo
8
8
 
9
- path_regex = r'(?:/[A-Za-z0-9_.-]+)*'
9
+ path_regex = r'(?:[A-Za-z0-9_.-]+/?)+'
10
10
  regexes = {
11
- "DIFF_LINE": re.compile(rf'diff --git (a{path_regex}+) (b{path_regex}+)'),
11
+ "DIFF_LINE": re.compile(rf'diff --git (a/{path_regex}) (b/{path_regex})'),
12
12
  "MODE_LINE": re.compile(r'(new|deleted) file mode [0-7]{6}'),
13
13
  "INDEX_LINE": re.compile(r'index [0-9a-f]{7,64}\.\.[0-9a-f]{7,64}(?: [0-7]{6})?|similarity index ([0-9]+)%'),
14
- "BINARY_LINE": re.compile(rf'Binary files (a{path_regex}+|/dev/null) and (b{path_regex}+|/dev/null) differ'),
14
+ "BINARY_LINE": re.compile(rf'Binary files (a/{path_regex}|/dev/null) and (b/{path_regex}|/dev/null) differ'),
15
15
  "RENAME_FROM": re.compile(rf'rename from ({path_regex})'),
16
16
  "RENAME_TO": re.compile(rf'rename to ({path_regex})'),
17
- "FILE_HEADER_START": re.compile(rf'--- (a{path_regex}+|/dev/null)'),
18
- "FILE_HEADER_END": re.compile(rf'\+\+\+ (b{path_regex}+|/dev/null)'),
17
+ "FILE_HEADER_START": re.compile(rf'--- (a/{path_regex}|/dev/null)'),
18
+ "FILE_HEADER_END": re.compile(rf'\+\+\+ (b/{path_regex}|/dev/null)'),
19
19
  "HUNK_HEADER": re.compile(r'^@@ -(\d+),(\d+) \+(\d+),(\d+) @@(.*)$'),
20
20
  "END_LINE": re.compile(r'\')
21
21
  }
@@ -26,12 +26,9 @@ class MissingHunkError(Exception):
26
26
 
27
27
 
28
28
  def normalize_line(line):
29
- if line.startswith('+'):
30
- # safe to normalize new content
31
- return '+' + line[1:].rstrip() + "\n"
32
- else:
33
- # preserve exactly (only normalize line endings)
34
- return line.rstrip("\r\n") + "\n"
29
+ # preserve whitespace, only normalize line endings
30
+ return line.rstrip("\r\n") + "\n"
31
+
35
32
 
36
33
  def find_hunk_start(context_lines, original_lines):
37
34
  """Search original_lines for context_lines and return start line index (0-based)."""
@@ -40,7 +37,8 @@ def find_hunk_start(context_lines, original_lines):
40
37
  if line.startswith(" "):
41
38
  ctx.append(line.lstrip(" "))
42
39
  elif line.startswith("-"):
43
- ctx.append(line.lstrip("-"))
40
+ # can't use lstrip; we want to keep other dashes in the line
41
+ ctx.append(line[1:])
44
42
  elif line.isspace() or line == "":
45
43
  ctx.append(line)
46
44
  if not ctx:
@@ -75,9 +73,9 @@ def reconstruct_file_header(diff_line, header_type):
75
73
  a, b = diff_groups
76
74
  match header_type:
77
75
  case "FILE_HEADER_START":
78
- return f"--- {a}"
76
+ return f"--- {a}\n"
79
77
  case "FILE_HEADER_END":
80
- return f"+++ {b}"
78
+ return f"+++ {b}\n"
81
79
  case _:
82
80
  raise ValueError(f"Unsupported header type: {header_type}")
83
81
 
@@ -87,14 +85,24 @@ def capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context):
87
85
  old_count = sum(1 for l in current_hunk if l.startswith((' ', '-')))
88
86
  new_count = sum(1 for l in current_hunk if l.startswith((' ', '+')))
89
87
 
90
- # compute starting line in original file
91
- old_start = find_hunk_start(current_hunk, original_lines) + 1
92
-
93
- # if the line number descends, we either have a bad match or a new file
94
- if old_start < last_hunk:
95
- raise MissingHunkError
88
+ if old_count > 0:
89
+ # compute starting line in original file
90
+ old_start = find_hunk_start(current_hunk, original_lines) + 1
91
+
92
+ # if the line number descends, we either have a bad match or a new file
93
+ if old_start < last_hunk:
94
+ raise MissingHunkError
95
+ else:
96
+ if new_count == 0:
97
+ # complete deletion of remaining content
98
+ new_start = 0
99
+ else:
100
+ new_start = old_start + offset
96
101
  else:
97
- new_start = old_start + offset
102
+ # old count of zero can only mean file creation, since adding lines to
103
+ # an existing file requires surrounding context lines without a +
104
+ old_start = 0
105
+ new_start = 1 # line numbers are 1-indexed in the real world
98
106
 
99
107
  offset += (new_count - old_count)
100
108
 
@@ -122,10 +130,10 @@ def regenerate_index(old_path, new_path, cur_dir):
122
130
  "as this would require manually applying the patch first."
123
131
  )
124
132
 
125
- return f"index {old_sha}..{new_sha}{mode}"
133
+ return f"index {old_sha}..{new_sha}{mode}\n"
126
134
 
127
135
 
128
- def fix_patch(patch_lines, original):
136
+ def fix_patch(patch_lines, original, remove_binary=False):
129
137
  dir_mode = os.path.isdir(original)
130
138
  original_path = Path(original).absolute()
131
139
 
@@ -146,7 +154,10 @@ def fix_patch(patch_lines, original):
146
154
  look_for_rename = False
147
155
  similarity_index = None
148
156
  missing_index = False
157
+ binary_file = False
149
158
  hunk_context = ""
159
+ original_lines = []
160
+ file_loaded = False
150
161
 
151
162
  for i, line in enumerate(patch_lines):
152
163
  match_groups, line_type = match_line(line)
@@ -168,12 +179,14 @@ def fix_patch(patch_lines, original):
168
179
  current_hunk = []
169
180
  a, b = split_ab(match_groups)
170
181
  if a != b:
171
- raise ValueError(f"Diff paths do not match: \n{a}\n{b}")
182
+ look_for_rename = True
172
183
  fixed_lines.append(normalize_line(line))
173
184
  last_diff = i
174
185
  file_start_header = False
175
186
  file_end_header = False
176
187
  first_hunk = True
188
+ binary_file = False
189
+ file_loaded = False
177
190
  case "MODE_LINE":
178
191
  if last_diff != i - 1:
179
192
  raise NotImplementedError("Missing diff line not yet supported")
@@ -188,14 +201,18 @@ def fix_patch(patch_lines, original):
188
201
  fixed_lines.append(normalize_line(line))
189
202
  missing_index = False
190
203
  case "BINARY_LINE":
191
- raise NotImplementedError("Binary files not supported yet")
204
+ if remove_binary:
205
+ raise NotImplementedError("Ignoring binary files not yet supported")
206
+ binary_file = True
207
+ fixed_lines.append(normalize_line(line))
192
208
  case "RENAME_FROM":
193
209
  if not look_for_rename:
194
210
  pass # TODO: handle missing index line
211
+ if binary_file:
212
+ raise NotImplementedError("Renaming binary files not yet supported")
195
213
  if last_index != i - 1:
196
214
  missing_index = True # need this for existence check in RENAME_TO block
197
- similarity_index = 100 # TODO: is this a dangerous assumption?
198
- fixed_index = "similarity index 100%"
215
+ fixed_index = "similarity index 100%\n"
199
216
  fixed_lines.append(normalize_line(fixed_index))
200
217
  last_index = i - 1
201
218
  look_for_rename = False
@@ -204,16 +221,18 @@ def fix_patch(patch_lines, original):
204
221
  offset = 0
205
222
  last_hunk = 0
206
223
  if not Path.exists(current_path):
207
- if similarity_index == 100:
208
- fixed_lines.append(normalize_line(line))
209
- look_for_rename = True
210
- continue
211
- raise NotImplementedError("Parsing files that were both renamed and modified is not yet supported.")
224
+ # TODO: verify whether this block is necessary at all
225
+ fixed_lines.append(normalize_line(line))
226
+ look_for_rename = True
227
+ file_loaded = False
228
+ continue
229
+ if not current_path.is_file():
230
+ raise IsADirectoryError(f"Rename from header points to a directory, not a file: {current_file}")
212
231
  if dir_mode or current_path == original_path:
213
232
  with open(current_path, encoding='utf-8') as f:
214
233
  original_lines = [l.rstrip('\n') for l in f.readlines()]
215
234
  fixed_lines.append(normalize_line(line))
216
- # TODO: analogous boolean to `file_start_header`?
235
+ file_loaded = True
217
236
  else:
218
237
  raise FileNotFoundError(f"Filename {current_file} in `rename from` header does not match argument {original}")
219
238
  case "RENAME_TO":
@@ -223,18 +242,20 @@ def fix_patch(patch_lines, original):
223
242
  last_index = i - 2
224
243
  else:
225
244
  raise NotImplementedError("Missing `rename from` header not yet supported.")
226
- if look_for_rename:
227
- # the old file doesn't exist, so we need to read this one
228
- current_file = match_groups[0]
229
- current_path = Path(current_file).absolute()
230
- with open(current_path, encoding='utf-8') as f:
231
- original_lines = [l.rstrip('\n') for l in f.readlines()]
232
- fixed_lines.append(normalize_line(line))
233
- look_for_rename = False
234
- pass
245
+ # TODO: do something sensible if `look_for_rename` is false
246
+ current_file = match_groups[0]
247
+ current_path = Path(current_file).absolute()
248
+ if current_file and current_path.is_dir():
249
+ raise IsADirectoryError(f"rename to points to a directory, not a file: {current_file}")
250
+ fixed_lines.append(normalize_line(line))
251
+ look_for_rename = False
235
252
  case "FILE_HEADER_START":
236
253
  if look_for_rename:
237
254
  raise NotImplementedError("Replacing file header with rename not yet supported.")
255
+ if binary_file:
256
+ raise NotImplementedError("A header block with both 'binary files differ' and "
257
+ "file start/end headers is a confusing state"
258
+ "\nfrom which there is no obvious way to recover.")
238
259
  if last_index != i - 1:
239
260
  missing_index = True
240
261
  last_index = i - 1
@@ -242,13 +263,15 @@ def fix_patch(patch_lines, original):
242
263
  if current_file and not dir_mode:
243
264
  raise ValueError("Diff references multiple files but only one provided.")
244
265
  current_file = match_groups[0]
245
- offset = 0
246
- last_hunk = 0
266
+ if not file_loaded:
267
+ offset = 0
268
+ last_hunk = 0
247
269
  if current_file == "/dev/null":
248
270
  if last_diff > last_mode:
249
271
  raise NotImplementedError("Missing mode line not yet supported")
250
272
  fixed_lines.append(normalize_line(line))
251
273
  file_start_header = True
274
+ file_loaded = False
252
275
  continue
253
276
  if current_file.startswith("a/"):
254
277
  current_file = current_file[2:]
@@ -257,16 +280,24 @@ def fix_patch(patch_lines, original):
257
280
  current_path = Path(current_file).absolute()
258
281
  if not current_path.exists():
259
282
  raise FileNotFoundError(f"File header start points to non-existent file: {current_file}")
260
- if dir_mode or Path(current_file) == Path(original):
261
- with open(current_file, encoding='utf-8') as f:
262
- original_lines = [l.rstrip('\n') for l in f.readlines()]
263
- fixed_lines.append(normalize_line(line))
264
- file_start_header = True
265
- else:
266
- raise FileNotFoundError(f"Filename {current_file} in header does not match argument {original}")
283
+ if not current_path.is_file():
284
+ raise IsADirectoryError(f"File header start points to a directory, not a file: {current_file}")
285
+ if not file_loaded:
286
+ if dir_mode or Path(current_file) == Path(original):
287
+ with open(current_file, encoding='utf-8') as f:
288
+ original_lines = [l.rstrip('\n') for l in f.readlines()]
289
+ file_loaded = True
290
+ else:
291
+ raise FileNotFoundError(f"Filename {current_file} in header does not match argument {original}")
292
+ fixed_lines.append(normalize_line(line))
293
+ file_start_header = True
267
294
  case "FILE_HEADER_END":
268
295
  if look_for_rename:
269
296
  raise NotImplementedError("Replacing file header with rename not yet supported.")
297
+ if binary_file:
298
+ raise NotImplementedError("A header block with both 'binary files differ' and "
299
+ "file start/end headers is a confusing state"
300
+ "\nfrom which there is no obvious way to recover.")
270
301
  dest_file = match_groups[0]
271
302
  dest_path = Path(dest_file).absolute()
272
303
  if dest_file.startswith("b/"):
@@ -286,37 +317,47 @@ def fix_patch(patch_lines, original):
286
317
  else:
287
318
  # reconstruct file start header based on end header
288
319
  a = match_groups[0].replace("b", "a")
289
- fixed_lines.append(normalize_line(f"--- {a}"))
320
+ fixed_lines.append(normalize_line(f"--- {a}\n"))
290
321
  file_start_header = True
291
322
  elif current_file == "/dev/null":
292
323
  if dest_file == "/dev/null":
293
324
  raise ValueError("File headers cannot both be /dev/null")
294
- elif not dest_path.exists():
295
- raise FileNotFoundError(f"File header end points to non-existent file: {dest_file}")
325
+ elif dest_path.exists():
326
+ raise FileExistsError(f"File header start /dev/null implies file creation, "
327
+ f"but file header end would overwrite existing file: {dest_file}")
296
328
  current_file = dest_file
297
329
  current_path = Path(current_file).absolute()
298
330
  if dir_mode or current_path == original_path:
299
- # TODO: in dir mode, verify that current file exists in original path
300
- with open(current_path, encoding='utf-8') as f:
301
- original_lines = [l.rstrip('\n') for l in f.readlines()]
331
+ original_lines = []
302
332
  fixed_lines.append(normalize_line(line))
303
333
  file_end_header = True
304
334
  else:
305
335
  raise FileNotFoundError(f"Filename {current_file} in header does not match argument {original}")
306
336
  elif dest_file == "/dev/null":
307
- # TODO: check if other modes are possible
308
- if last_mode < last_diff:
309
- last_mode = last_diff + 1
310
- fixed_lines.insert(last_mode, "deleted file mode 100644")
311
- last_index += 1 # index comes after mode
337
+ current_path = Path(current_file).absolute()
338
+ if not current_path.exists():
339
+ raise FileNotFoundError(f"The file being 'deleted' does not exist: {current_file}")
340
+ if last_mode <= last_diff:
341
+ fixed_lines.insert(last_diff + 1, "deleted file mode 100644\n")
342
+ last_index += 1
312
343
  elif "deleted" not in fixed_lines[last_mode]:
313
- fixed_lines[last_mode] = "deleted file mode 100644"
314
- else:
315
- fixed_lines.append("deleted file mode 100644")
344
+ fixed_lines[last_mode] = "deleted file mode 100644\n"
345
+ fixed_lines.append(normalize_line(line))
346
+ file_end_header = True
316
347
  elif current_file != dest_file:
317
- raise ValueError(f"File headers do not match: \n{current_file}\n{dest_file}")
318
- pass
348
+ # this is a rename, original_lines is already set from FILE_HEADER_START
349
+ fixed_lines.append(normalize_line(line))
350
+ file_end_header = True
351
+ first_hunk = True
352
+ else:
353
+ fixed_lines.append(normalize_line(line))
354
+ file_end_header = True
319
355
  case "HUNK_HEADER":
356
+ if binary_file:
357
+ raise ValueError("Binary file can't have a hunk header.")
358
+ if look_for_rename:
359
+ raise ValueError(f"Rename header expected but not found.\n"
360
+ f"Hint: look at lines {last_diff}-{i} of the input patch.")
320
361
  # fix missing file headers before capturing the hunk
321
362
  if not file_end_header:
322
363
  diff_line = patch_lines[last_diff]
@@ -370,7 +411,7 @@ def fix_patch(patch_lines, original):
370
411
  fixed_lines.extend(current_hunk)
371
412
 
372
413
  # if original file didn't end with a newline, strip out the newline here
373
- if not original_lines[-1].endswith("\n"):
414
+ if original_lines and not original_lines[-1].endswith("\n"):
374
415
  fixed_lines[-1] = fixed_lines[-1].rstrip("\n")
375
416
 
376
417
  return fixed_lines
@@ -395,6 +436,7 @@ def main():
395
436
 
396
437
  print(f"Fixed patch written to {output_file}")
397
438
 
439
+
398
440
  if __name__ == "__main__":
399
441
  main()
400
442
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patch-fixer
3
- Version: 0.2.3
3
+ Version: 0.3.0
4
4
  Summary: Fixes erroneous git apply patches to the best of its ability.
5
5
  Maintainer-email: Alex Mueller <amueller474@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -69,8 +69,9 @@ cd patch-fixer
69
69
  pip install -e .[test]
70
70
  pytest
71
71
  ```
72
- Note that some test failures are expected as this project is in the early alpha stage.
73
- Please only report test failures if the same test passed in a previous version.
72
+ From version `0.3.0` onward (at least until version `1.0`), some test failures are expected
73
+ in bugfix versions as I like to use test-driven development to build out new features.
74
+ Please only report test failures if the same test passed in the most recent `0.x.0` version.
74
75
 
75
76
  ## License
76
77
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "patch-fixer"
7
- version = "0.2.3"
7
+ version = "0.3.0"
8
8
  description = "Fixes erroneous git apply patches to the best of its ability."
9
9
  maintainers = [
10
10
  {name = "Alex Mueller", email="amueller474@gmail.com"},
@@ -36,6 +36,7 @@ REPOS = {
36
36
 
37
37
  CACHE_DIR = Path.home() / ".patch-testing"
38
38
 
39
+
39
40
  class DeletedBranchError(ValueError):
40
41
  def __init__(self, commit_hash):
41
42
  self.commit_hash = commit_hash
@@ -47,7 +48,7 @@ def verify_commit_exists(repo: Repo, commit_hash: str) -> None:
47
48
  try:
48
49
  repo.commit(commit_hash)
49
50
  except ValueError:
50
- # Commit belongs to a deleted branch (let caller handle it)
51
+ # commit belongs to a deleted branch (let caller handle it)
51
52
  raise DeletedBranchError(commit_hash)
52
53
 
53
54
 
@@ -64,13 +65,13 @@ def download_commit_zip(repo_url, commit_hash: str, dest_path: Path) -> None:
64
65
  print(f"Failed to download commit snapshot: {e}")
65
66
  sys.exit(1)
66
67
 
67
- # Extract the zip into dest_path
68
+ # extract the zip into dest_path
68
69
  with zipfile.ZipFile(io.BytesIO(r.content)) as z:
69
70
  # GitHub wraps contents in a top-level folder named like repo-<hash>
70
71
  top_level = z.namelist()[0].split("/")[0]
71
72
  z.extractall(dest_path.parent)
72
73
 
73
- # Move extracted folder to dest_path
74
+ # move extracted folder to dest_path
74
75
  extracted_path = dest_path.parent / top_level
75
76
  if dest_path.exists():
76
77
  shutil.rmtree(dest_path)
@@ -111,7 +112,7 @@ def clone_repos(repo_group, repo_name, old_commit, new_commit):
111
112
  # no sense keeping around an object that points to HEAD
112
113
  repo_new = Repo(repo_new_path)
113
114
 
114
- # Prevent downloading the repo twice if we can help it
115
+ # prevent downloading the repo twice if we can help it
115
116
  shutil.copytree(repo_new_path, repo_old_path)
116
117
  repo_old = Repo(repo_old_path)
117
118
  try:
@@ -137,7 +138,7 @@ def test_integration_equality(repo_group, repo_name, old_commit, new_commit):
137
138
  ) = clone_repos(repo_group, repo_name, old_commit, new_commit)
138
139
 
139
140
  expected = repo_new.git.diff(old_commit, new_commit)
140
- input_lines = expected.splitlines()
141
+ input_lines = expected.splitlines(keepends=True)
141
142
  fixed_lines = fix_patch(input_lines, repo_old_path)
142
143
  actual = "".join(fixed_lines)
143
144
 
File without changes
File without changes