patch-fixer 0.2.3__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patch-fixer
3
- Version: 0.2.3
3
+ Version: 0.3.1
4
4
  Summary: Fixes erroneous git apply patches to the best of its ability.
5
5
  Maintainer-email: Alex Mueller <amueller474@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -22,6 +22,7 @@ Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
23
  Requires-Dist: GitPython
24
24
  Provides-Extra: test
25
+ Requires-Dist: hypothesis; extra == "test"
25
26
  Requires-Dist: pytest; extra == "test"
26
27
  Requires-Dist: requests; extra == "test"
27
28
  Dynamic: license-file
@@ -69,8 +70,9 @@ cd patch-fixer
69
70
  pip install -e .[test]
70
71
  pytest
71
72
  ```
72
- Note that some test failures are expected as this project is in the early alpha stage.
73
- Please only report test failures if the same test passed in a previous version.
73
+ From version `0.3.0` onward (at least until version `1.0`), some test failures are expected
74
+ in bugfix versions as I like to use test-driven development to build out new features.
75
+ Please only report test failures if the same test existed and passed in the most recent `0.x.0` version.
74
76
 
75
77
  ## License
76
78
 
@@ -41,8 +41,9 @@ cd patch-fixer
41
41
  pip install -e .[test]
42
42
  pytest
43
43
  ```
44
- Note that some test failures are expected as this project is in the early alpha stage.
45
- Please only report test failures if the same test passed in a previous version.
44
+ From version `0.3.0` onward (at least until version `1.0`), some test failures are expected
45
+ in bugfix versions as I like to use test-driven development to build out new features.
46
+ Please only report test failures if the same test existed and passed in the most recent `0.x.0` version.
46
47
 
47
48
  ## License
48
49
 
@@ -6,16 +6,16 @@ from pathlib import Path
6
6
 
7
7
  from git import Repo
8
8
 
9
- path_regex = r'(?:/[A-Za-z0-9_.-]+)*'
9
+ path_regex = r'(?:[A-Za-z0-9_.-]+/?)+'
10
10
  regexes = {
11
- "DIFF_LINE": re.compile(rf'diff --git (a{path_regex}+) (b{path_regex}+)'),
11
+ "DIFF_LINE": re.compile(rf'diff --git (a/{path_regex}) (b/{path_regex})'),
12
12
  "MODE_LINE": re.compile(r'(new|deleted) file mode [0-7]{6}'),
13
13
  "INDEX_LINE": re.compile(r'index [0-9a-f]{7,64}\.\.[0-9a-f]{7,64}(?: [0-7]{6})?|similarity index ([0-9]+)%'),
14
- "BINARY_LINE": re.compile(rf'Binary files (a{path_regex}+|/dev/null) and (b{path_regex}+|/dev/null) differ'),
14
+ "BINARY_LINE": re.compile(rf'Binary files (a/{path_regex}|/dev/null) and (b/{path_regex}|/dev/null) differ'),
15
15
  "RENAME_FROM": re.compile(rf'rename from ({path_regex})'),
16
16
  "RENAME_TO": re.compile(rf'rename to ({path_regex})'),
17
- "FILE_HEADER_START": re.compile(rf'--- (a{path_regex}+|/dev/null)'),
18
- "FILE_HEADER_END": re.compile(rf'\+\+\+ (b{path_regex}+|/dev/null)'),
17
+ "FILE_HEADER_START": re.compile(rf'--- (a/{path_regex}|/dev/null)'),
18
+ "FILE_HEADER_END": re.compile(rf'\+\+\+ (b/{path_regex}|/dev/null)'),
19
19
  "HUNK_HEADER": re.compile(r'^@@ -(\d+),(\d+) \+(\d+),(\d+) @@(.*)$'),
20
20
  "END_LINE": re.compile(r'\')
21
21
  }
@@ -25,13 +25,41 @@ class MissingHunkError(Exception):
25
25
  pass
26
26
 
27
27
 
28
+ class BadCarriageReturn(ValueError):
29
+ pass
30
+
31
+
28
32
  def normalize_line(line):
29
- if line.startswith('+'):
30
- # safe to normalize new content
31
- return '+' + line[1:].rstrip() + "\n"
33
+ """Normalize line endings while preserving whitespace."""
34
+ if not isinstance(line, str):
35
+ raise TypeError(f"Cannot normalize non-string object {line}")
36
+
37
+ # edge case: empty string
38
+ if line == "":
39
+ return "\n"
40
+
41
+ # special malformed ending: ...\n\r
42
+ if line.endswith("\n\r"):
43
+ raise BadCarriageReturn(f"carriage return after line feed: {line}")
44
+
45
+ # handle CRLF and simple CR/LF endings
46
+ if line.endswith("\r\n"):
47
+ core = line[:-2]
48
+ elif line.endswith("\r"):
49
+ core = line[:-1]
50
+ elif line.endswith("\n"):
51
+ core = line[:-1]
32
52
  else:
33
- # preserve exactly (only normalize line endings)
34
- return line.rstrip("\r\n") + "\n"
53
+ core = line
54
+
55
+ # check for interior CR/LF (anything before the final terminator)
56
+ if "\n" in core:
57
+ raise ValueError(f"line feed in middle of line: {line}")
58
+ if "\r" in core:
59
+ raise BadCarriageReturn(f"carriage return in middle of line: {line}")
60
+
61
+ return core + "\n"
62
+
35
63
 
36
64
  def find_hunk_start(context_lines, original_lines):
37
65
  """Search original_lines for context_lines and return start line index (0-based)."""
@@ -40,7 +68,8 @@ def find_hunk_start(context_lines, original_lines):
40
68
  if line.startswith(" "):
41
69
  ctx.append(line.lstrip(" "))
42
70
  elif line.startswith("-"):
43
- ctx.append(line.lstrip("-"))
71
+ # can't use lstrip; we want to keep other dashes in the line
72
+ ctx.append(line[1:])
44
73
  elif line.isspace() or line == "":
45
74
  ctx.append(line)
46
75
  if not ctx:
@@ -75,9 +104,9 @@ def reconstruct_file_header(diff_line, header_type):
75
104
  a, b = diff_groups
76
105
  match header_type:
77
106
  case "FILE_HEADER_START":
78
- return f"--- {a}"
107
+ return f"--- {a}\n"
79
108
  case "FILE_HEADER_END":
80
- return f"+++ {b}"
109
+ return f"+++ {b}\n"
81
110
  case _:
82
111
  raise ValueError(f"Unsupported header type: {header_type}")
83
112
 
@@ -87,14 +116,24 @@ def capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context):
87
116
  old_count = sum(1 for l in current_hunk if l.startswith((' ', '-')))
88
117
  new_count = sum(1 for l in current_hunk if l.startswith((' ', '+')))
89
118
 
90
- # compute starting line in original file
91
- old_start = find_hunk_start(current_hunk, original_lines) + 1
92
-
93
- # if the line number descends, we either have a bad match or a new file
94
- if old_start < last_hunk:
95
- raise MissingHunkError
119
+ if old_count > 0:
120
+ # compute starting line in original file
121
+ old_start = find_hunk_start(current_hunk, original_lines) + 1
122
+
123
+ # if the line number descends, we either have a bad match or a new file
124
+ if old_start < last_hunk:
125
+ raise MissingHunkError
126
+ else:
127
+ if new_count == 0:
128
+ # complete deletion of remaining content
129
+ new_start = 0
130
+ else:
131
+ new_start = old_start + offset
96
132
  else:
97
- new_start = old_start + offset
133
+ # old count of zero can only mean file creation, since adding lines to
134
+ # an existing file requires surrounding context lines without a +
135
+ old_start = 0
136
+ new_start = 1 # line numbers are 1-indexed in the real world
98
137
 
99
138
  offset += (new_count - old_count)
100
139
 
@@ -122,10 +161,10 @@ def regenerate_index(old_path, new_path, cur_dir):
122
161
  "as this would require manually applying the patch first."
123
162
  )
124
163
 
125
- return f"index {old_sha}..{new_sha}{mode}"
164
+ return f"index {old_sha}..{new_sha}{mode}\n"
126
165
 
127
166
 
128
- def fix_patch(patch_lines, original):
167
+ def fix_patch(patch_lines, original, remove_binary=False):
129
168
  dir_mode = os.path.isdir(original)
130
169
  original_path = Path(original).absolute()
131
170
 
@@ -146,7 +185,10 @@ def fix_patch(patch_lines, original):
146
185
  look_for_rename = False
147
186
  similarity_index = None
148
187
  missing_index = False
188
+ binary_file = False
149
189
  hunk_context = ""
190
+ original_lines = []
191
+ file_loaded = False
150
192
 
151
193
  for i, line in enumerate(patch_lines):
152
194
  match_groups, line_type = match_line(line)
@@ -168,12 +210,14 @@ def fix_patch(patch_lines, original):
168
210
  current_hunk = []
169
211
  a, b = split_ab(match_groups)
170
212
  if a != b:
171
- raise ValueError(f"Diff paths do not match: \n{a}\n{b}")
213
+ look_for_rename = True
172
214
  fixed_lines.append(normalize_line(line))
173
215
  last_diff = i
174
216
  file_start_header = False
175
217
  file_end_header = False
176
218
  first_hunk = True
219
+ binary_file = False
220
+ file_loaded = False
177
221
  case "MODE_LINE":
178
222
  if last_diff != i - 1:
179
223
  raise NotImplementedError("Missing diff line not yet supported")
@@ -188,14 +232,18 @@ def fix_patch(patch_lines, original):
188
232
  fixed_lines.append(normalize_line(line))
189
233
  missing_index = False
190
234
  case "BINARY_LINE":
191
- raise NotImplementedError("Binary files not supported yet")
235
+ if remove_binary:
236
+ raise NotImplementedError("Ignoring binary files not yet supported")
237
+ binary_file = True
238
+ fixed_lines.append(normalize_line(line))
192
239
  case "RENAME_FROM":
193
240
  if not look_for_rename:
194
241
  pass # TODO: handle missing index line
242
+ if binary_file:
243
+ raise NotImplementedError("Renaming binary files not yet supported")
195
244
  if last_index != i - 1:
196
245
  missing_index = True # need this for existence check in RENAME_TO block
197
- similarity_index = 100 # TODO: is this a dangerous assumption?
198
- fixed_index = "similarity index 100%"
246
+ fixed_index = "similarity index 100%\n"
199
247
  fixed_lines.append(normalize_line(fixed_index))
200
248
  last_index = i - 1
201
249
  look_for_rename = False
@@ -204,16 +252,18 @@ def fix_patch(patch_lines, original):
204
252
  offset = 0
205
253
  last_hunk = 0
206
254
  if not Path.exists(current_path):
207
- if similarity_index == 100:
208
- fixed_lines.append(normalize_line(line))
209
- look_for_rename = True
210
- continue
211
- raise NotImplementedError("Parsing files that were both renamed and modified is not yet supported.")
255
+ # TODO: verify whether this block is necessary at all
256
+ fixed_lines.append(normalize_line(line))
257
+ look_for_rename = True
258
+ file_loaded = False
259
+ continue
260
+ if not current_path.is_file():
261
+ raise IsADirectoryError(f"Rename from header points to a directory, not a file: {current_file}")
212
262
  if dir_mode or current_path == original_path:
213
263
  with open(current_path, encoding='utf-8') as f:
214
264
  original_lines = [l.rstrip('\n') for l in f.readlines()]
215
265
  fixed_lines.append(normalize_line(line))
216
- # TODO: analogous boolean to `file_start_header`?
266
+ file_loaded = True
217
267
  else:
218
268
  raise FileNotFoundError(f"Filename {current_file} in `rename from` header does not match argument {original}")
219
269
  case "RENAME_TO":
@@ -223,18 +273,20 @@ def fix_patch(patch_lines, original):
223
273
  last_index = i - 2
224
274
  else:
225
275
  raise NotImplementedError("Missing `rename from` header not yet supported.")
226
- if look_for_rename:
227
- # the old file doesn't exist, so we need to read this one
228
- current_file = match_groups[0]
229
- current_path = Path(current_file).absolute()
230
- with open(current_path, encoding='utf-8') as f:
231
- original_lines = [l.rstrip('\n') for l in f.readlines()]
232
- fixed_lines.append(normalize_line(line))
233
- look_for_rename = False
234
- pass
276
+ # TODO: do something sensible if `look_for_rename` is false
277
+ current_file = match_groups[0]
278
+ current_path = Path(current_file).absolute()
279
+ if current_file and current_path.is_dir():
280
+ raise IsADirectoryError(f"rename to points to a directory, not a file: {current_file}")
281
+ fixed_lines.append(normalize_line(line))
282
+ look_for_rename = False
235
283
  case "FILE_HEADER_START":
236
284
  if look_for_rename:
237
285
  raise NotImplementedError("Replacing file header with rename not yet supported.")
286
+ if binary_file:
287
+ raise NotImplementedError("A header block with both 'binary files differ' and "
288
+ "file start/end headers is a confusing state"
289
+ "\nfrom which there is no obvious way to recover.")
238
290
  if last_index != i - 1:
239
291
  missing_index = True
240
292
  last_index = i - 1
@@ -242,13 +294,15 @@ def fix_patch(patch_lines, original):
242
294
  if current_file and not dir_mode:
243
295
  raise ValueError("Diff references multiple files but only one provided.")
244
296
  current_file = match_groups[0]
245
- offset = 0
246
- last_hunk = 0
297
+ if not file_loaded:
298
+ offset = 0
299
+ last_hunk = 0
247
300
  if current_file == "/dev/null":
248
301
  if last_diff > last_mode:
249
302
  raise NotImplementedError("Missing mode line not yet supported")
250
303
  fixed_lines.append(normalize_line(line))
251
304
  file_start_header = True
305
+ file_loaded = False
252
306
  continue
253
307
  if current_file.startswith("a/"):
254
308
  current_file = current_file[2:]
@@ -257,16 +311,24 @@ def fix_patch(patch_lines, original):
257
311
  current_path = Path(current_file).absolute()
258
312
  if not current_path.exists():
259
313
  raise FileNotFoundError(f"File header start points to non-existent file: {current_file}")
260
- if dir_mode or Path(current_file) == Path(original):
261
- with open(current_file, encoding='utf-8') as f:
262
- original_lines = [l.rstrip('\n') for l in f.readlines()]
263
- fixed_lines.append(normalize_line(line))
264
- file_start_header = True
265
- else:
266
- raise FileNotFoundError(f"Filename {current_file} in header does not match argument {original}")
314
+ if not current_path.is_file():
315
+ raise IsADirectoryError(f"File header start points to a directory, not a file: {current_file}")
316
+ if not file_loaded:
317
+ if dir_mode or Path(current_file) == Path(original):
318
+ with open(current_file, encoding='utf-8') as f:
319
+ original_lines = [l.rstrip('\n') for l in f.readlines()]
320
+ file_loaded = True
321
+ else:
322
+ raise FileNotFoundError(f"Filename {current_file} in header does not match argument {original}")
323
+ fixed_lines.append(normalize_line(line))
324
+ file_start_header = True
267
325
  case "FILE_HEADER_END":
268
326
  if look_for_rename:
269
327
  raise NotImplementedError("Replacing file header with rename not yet supported.")
328
+ if binary_file:
329
+ raise NotImplementedError("A header block with both 'binary files differ' and "
330
+ "file start/end headers is a confusing state"
331
+ "\nfrom which there is no obvious way to recover.")
270
332
  dest_file = match_groups[0]
271
333
  dest_path = Path(dest_file).absolute()
272
334
  if dest_file.startswith("b/"):
@@ -286,37 +348,47 @@ def fix_patch(patch_lines, original):
286
348
  else:
287
349
  # reconstruct file start header based on end header
288
350
  a = match_groups[0].replace("b", "a")
289
- fixed_lines.append(normalize_line(f"--- {a}"))
351
+ fixed_lines.append(normalize_line(f"--- {a}\n"))
290
352
  file_start_header = True
291
353
  elif current_file == "/dev/null":
292
354
  if dest_file == "/dev/null":
293
355
  raise ValueError("File headers cannot both be /dev/null")
294
- elif not dest_path.exists():
295
- raise FileNotFoundError(f"File header end points to non-existent file: {dest_file}")
356
+ elif dest_path.exists():
357
+ raise FileExistsError(f"File header start /dev/null implies file creation, "
358
+ f"but file header end would overwrite existing file: {dest_file}")
296
359
  current_file = dest_file
297
360
  current_path = Path(current_file).absolute()
298
361
  if dir_mode or current_path == original_path:
299
- # TODO: in dir mode, verify that current file exists in original path
300
- with open(current_path, encoding='utf-8') as f:
301
- original_lines = [l.rstrip('\n') for l in f.readlines()]
362
+ original_lines = []
302
363
  fixed_lines.append(normalize_line(line))
303
364
  file_end_header = True
304
365
  else:
305
366
  raise FileNotFoundError(f"Filename {current_file} in header does not match argument {original}")
306
367
  elif dest_file == "/dev/null":
307
- # TODO: check if other modes are possible
308
- if last_mode < last_diff:
309
- last_mode = last_diff + 1
310
- fixed_lines.insert(last_mode, "deleted file mode 100644")
311
- last_index += 1 # index comes after mode
368
+ current_path = Path(current_file).absolute()
369
+ if not current_path.exists():
370
+ raise FileNotFoundError(f"The file being 'deleted' does not exist: {current_file}")
371
+ if last_mode <= last_diff:
372
+ fixed_lines.insert(last_diff + 1, "deleted file mode 100644\n")
373
+ last_index += 1
312
374
  elif "deleted" not in fixed_lines[last_mode]:
313
- fixed_lines[last_mode] = "deleted file mode 100644"
314
- else:
315
- fixed_lines.append("deleted file mode 100644")
375
+ fixed_lines[last_mode] = "deleted file mode 100644\n"
376
+ fixed_lines.append(normalize_line(line))
377
+ file_end_header = True
316
378
  elif current_file != dest_file:
317
- raise ValueError(f"File headers do not match: \n{current_file}\n{dest_file}")
318
- pass
379
+ # this is a rename, original_lines is already set from FILE_HEADER_START
380
+ fixed_lines.append(normalize_line(line))
381
+ file_end_header = True
382
+ first_hunk = True
383
+ else:
384
+ fixed_lines.append(normalize_line(line))
385
+ file_end_header = True
319
386
  case "HUNK_HEADER":
387
+ if binary_file:
388
+ raise ValueError("Binary file can't have a hunk header.")
389
+ if look_for_rename:
390
+ raise ValueError(f"Rename header expected but not found.\n"
391
+ f"Hint: look at lines {last_diff}-{i} of the input patch.")
320
392
  # fix missing file headers before capturing the hunk
321
393
  if not file_end_header:
322
394
  diff_line = patch_lines[last_diff]
@@ -370,7 +442,7 @@ def fix_patch(patch_lines, original):
370
442
  fixed_lines.extend(current_hunk)
371
443
 
372
444
  # if original file didn't end with a newline, strip out the newline here
373
- if not original_lines[-1].endswith("\n"):
445
+ if original_lines and not original_lines[-1].endswith("\n"):
374
446
  fixed_lines[-1] = fixed_lines[-1].rstrip("\n")
375
447
 
376
448
  return fixed_lines
@@ -395,6 +467,7 @@ def main():
395
467
 
396
468
  print(f"Fixed patch written to {output_file}")
397
469
 
470
+
398
471
  if __name__ == "__main__":
399
472
  main()
400
473
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patch-fixer
3
- Version: 0.2.3
3
+ Version: 0.3.1
4
4
  Summary: Fixes erroneous git apply patches to the best of its ability.
5
5
  Maintainer-email: Alex Mueller <amueller474@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -22,6 +22,7 @@ Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
23
  Requires-Dist: GitPython
24
24
  Provides-Extra: test
25
+ Requires-Dist: hypothesis; extra == "test"
25
26
  Requires-Dist: pytest; extra == "test"
26
27
  Requires-Dist: requests; extra == "test"
27
28
  Dynamic: license-file
@@ -69,8 +70,9 @@ cd patch-fixer
69
70
  pip install -e .[test]
70
71
  pytest
71
72
  ```
72
- Note that some test failures are expected as this project is in the early alpha stage.
73
- Please only report test failures if the same test passed in a previous version.
73
+ From version `0.3.0` onward (at least until version `1.0`), some test failures are expected
74
+ in bugfix versions as I like to use test-driven development to build out new features.
75
+ Please only report test failures if the same test existed and passed in the most recent `0.x.0` version.
74
76
 
75
77
  ## License
76
78
 
@@ -8,4 +8,5 @@ patch_fixer.egg-info/SOURCES.txt
8
8
  patch_fixer.egg-info/dependency_links.txt
9
9
  patch_fixer.egg-info/requires.txt
10
10
  patch_fixer.egg-info/top_level.txt
11
+ tests/test_norm.py
11
12
  tests/test_repos.py
@@ -1,5 +1,6 @@
1
1
  GitPython
2
2
 
3
3
  [test]
4
+ hypothesis
4
5
  pytest
5
6
  requests
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "patch-fixer"
7
- version = "0.2.3"
7
+ version = "0.3.1"
8
8
  description = "Fixes erroneous git apply patches to the best of its ability."
9
9
  maintainers = [
10
10
  {name = "Alex Mueller", email="amueller474@gmail.com"},
@@ -34,6 +34,7 @@ license-files = [
34
34
 
35
35
  [project.optional-dependencies]
36
36
  test = [
37
+ "hypothesis",
37
38
  "pytest",
38
39
  "requests"
39
40
  ]
@@ -0,0 +1,87 @@
1
+ import pytest
2
+ from hypothesis import given, strategies as st
3
+
4
+ from patch_fixer.patch_fixer import normalize_line, BadCarriageReturn
5
+
6
+ # --- Good cases --------------------------------------------------
7
+
8
+ @pytest.mark.parametrize("line, expected", [
9
+ ("", "\n"), # empty string -> newline
10
+ ("foo", "foo\n"), # no terminator
11
+ ("foo\r", "foo\n"), # CR terminator normalized
12
+ ("foo\n", "foo\n"), # LF terminator unchanged
13
+ ("foo\r\n", "foo\n"), # CRLF normalized
14
+ ])
15
+ def test_normalize_good(line, expected):
16
+ assert normalize_line(line) == expected
17
+
18
+
19
+ # --- Type errors -------------------------------------------------
20
+
21
+ @pytest.mark.parametrize("bad", [
22
+ 123,
23
+ 4.56,
24
+ None,
25
+ True,
26
+ ["list"],
27
+ {"set"},
28
+ {"dict": "val"},
29
+ ("tuple",),
30
+ ])
31
+ def test_normalize_type_error(bad):
32
+ with pytest.raises(TypeError):
33
+ normalize_line(bad)
34
+
35
+
36
+ # --- Bad endings -------------------------------------------------
37
+
38
+ @pytest.mark.parametrize("line", [
39
+ "foo\n\r", # LF then CR
40
+ "foo\rx", # CR not followed by LF at end
41
+ ])
42
+ def test_normalize_bad_endings(line):
43
+ with pytest.raises(BadCarriageReturn):
44
+ normalize_line(line)
45
+
46
+
47
+ # --- Interior CR/LF ----------------------------------------------
48
+
49
+ def test_interior_lf_raises():
50
+ line = "bad\nline\n"
51
+ with pytest.raises(ValueError):
52
+ normalize_line(line)
53
+
54
+ def test_interior_cr_raises():
55
+ line = "bad\rcarriage\n"
56
+ with pytest.raises(BadCarriageReturn):
57
+ normalize_line(line)
58
+
59
+ # --- Hypothesis testing ------------------------------------------
60
+
61
+ # generate arbitrary strings including \r and \n
62
+ line_strategy = st.text(alphabet=st.characters(), min_size=0, max_size=100)
63
+
64
+ @given(line=line_strategy)
65
+ def test_normalize_line_hypothesis(line):
66
+ # we want to see that normalize_line either:
67
+ # 1. returns a string ending with exactly one "\n", or
68
+ # 2. raises ValueError for interior LF, or
69
+ # 3. raises BadCarriageReturn for interior CR or malformed endings
70
+ try:
71
+ result = normalize_line(line)
72
+ except BadCarriageReturn:
73
+ # must have an interior CR somewhere, or malformed ending
74
+ cr_condition = (("\r" in line[:-2])
75
+ or (line.endswith("\r") and not line.endswith("\r\n"))
76
+ or line.endswith("\n\r"))
77
+ assert cr_condition, f"BadCarriageReturn raised unexpectedly for line: {line!r}"
78
+ except ValueError:
79
+ # must have an interior LF somewhere
80
+ assert "\n" in line[:-1], f"ValueError raised unexpectedly for line: {line!r}"
81
+ else:
82
+ # function returned normally
83
+ assert result.endswith("\n"), f"Returned line does not end with \\n: {result!r}"
84
+
85
+ core = result[:-1]
86
+ assert "\n" not in core
87
+ assert "\r" not in core
@@ -27,15 +27,25 @@ import pytest
27
27
  from patch_fixer import fix_patch
28
28
 
29
29
  REPOS = {
30
+ ("apache", "airflow"): ("26f6e54","2136f56"), # big repo
30
31
  ("asottile", "astpretty"): ("5b68c7e", "5a8296f"),
32
+ ("astral-sh", "ruff"): ("7fee877", "11dae2c"),
33
+ ("gabrielecirulli", "2048"): ("878098f", "478b6ec"), # adds binary files
34
+ ("mrdoob", "three.js"): ("5f3a718", "b97f111"), # replaces images
35
+ ("myriadrf", "LimeSDR-Mini"): ("0bb75e7", "fb012c8"), # gigantic diffs
31
36
  ("numpy", "numpy"): ("dca33b3", "5f82966"),
32
37
  ("pallets", "click"): ("93c6966", "e11a1ef"),
38
+ ("psf", "black"): ("8d9d18c", "903bef5"), # whole year's worth of changes
39
+ ("PyCQA", "flake8"): ("8bdec0b", "d45bdc0"), # two years of changes
33
40
  ("scipy", "scipy"): ("c2220c0", "4ca6dd9"),
41
+ ("tox-dev", "tox"): ("fb3fe66", "01442da"), # four years
34
42
  ("yaml", "pyyaml"): ("48838a3", "a2d19c0"),
43
+ ("zertovitch", "hac"): ("c563d18", "17207ee") # renamed binary files
35
44
  }
36
45
 
37
46
  CACHE_DIR = Path.home() / ".patch-testing"
38
47
 
48
+
39
49
  class DeletedBranchError(ValueError):
40
50
  def __init__(self, commit_hash):
41
51
  self.commit_hash = commit_hash
@@ -47,7 +57,7 @@ def verify_commit_exists(repo: Repo, commit_hash: str) -> None:
47
57
  try:
48
58
  repo.commit(commit_hash)
49
59
  except ValueError:
50
- # Commit belongs to a deleted branch (let caller handle it)
60
+ # commit belongs to a deleted branch (let caller handle it)
51
61
  raise DeletedBranchError(commit_hash)
52
62
 
53
63
 
@@ -64,13 +74,13 @@ def download_commit_zip(repo_url, commit_hash: str, dest_path: Path) -> None:
64
74
  print(f"Failed to download commit snapshot: {e}")
65
75
  sys.exit(1)
66
76
 
67
- # Extract the zip into dest_path
77
+ # extract the zip into dest_path
68
78
  with zipfile.ZipFile(io.BytesIO(r.content)) as z:
69
79
  # GitHub wraps contents in a top-level folder named like repo-<hash>
70
80
  top_level = z.namelist()[0].split("/")[0]
71
81
  z.extractall(dest_path.parent)
72
82
 
73
- # Move extracted folder to dest_path
83
+ # move extracted folder to dest_path
74
84
  extracted_path = dest_path.parent / top_level
75
85
  if dest_path.exists():
76
86
  shutil.rmtree(dest_path)
@@ -111,7 +121,7 @@ def clone_repos(repo_group, repo_name, old_commit, new_commit):
111
121
  # no sense keeping around an object that points to HEAD
112
122
  repo_new = Repo(repo_new_path)
113
123
 
114
- # Prevent downloading the repo twice if we can help it
124
+ # prevent downloading the repo twice if we can help it
115
125
  shutil.copytree(repo_new_path, repo_old_path)
116
126
  repo_old = Repo(repo_old_path)
117
127
  try:
@@ -137,7 +147,7 @@ def test_integration_equality(repo_group, repo_name, old_commit, new_commit):
137
147
  ) = clone_repos(repo_group, repo_name, old_commit, new_commit)
138
148
 
139
149
  expected = repo_new.git.diff(old_commit, new_commit)
140
- input_lines = expected.splitlines()
150
+ input_lines = expected.splitlines(keepends=True)
141
151
  fixed_lines = fix_patch(input_lines, repo_old_path)
142
152
  actual = "".join(fixed_lines)
143
153
 
File without changes
File without changes