patch-fixer 0.3.3__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patch-fixer
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: Fixes erroneous git apply patches to the best of its ability.
5
5
  Maintainer-email: Alex Mueller <amueller474@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -107,6 +107,13 @@ with open("excluded.patch", 'w', encoding='utf-8') as f:
107
107
  f.writelines(excluded)
108
108
  ```
109
109
 
110
+ ## Known Limitations
111
+
112
+ - When fixing patches with missing `index` lines, the tool requires the files to be in a git repository to regenerate the index. This is only needed for file deletions and renames.
113
+ - `patch-fixer` assumes the patch follows git's unified diff format.
114
+ - Current implementation is not very robust to corrupted hunk content
115
+ - Much more comprehensive fuzzy string matching is planned
116
+
110
117
  ## Local Testing
111
118
  ```bash
112
119
  git clone https://github.com/ajcm474/patch-fixer.git
@@ -78,6 +78,13 @@ with open("excluded.patch", 'w', encoding='utf-8') as f:
78
78
  f.writelines(excluded)
79
79
  ```
80
80
 
81
+ ## Known Limitations
82
+
83
+ - When fixing patches with missing `index` lines, the tool requires the files to be in a git repository to regenerate the index. This is only needed for file deletions and renames.
84
+ - `patch-fixer` assumes the patch follows git's unified diff format.
85
+ - Current implementation is not very robust to corrupted hunk content
86
+ - Much more comprehensive fuzzy string matching is planned
87
+
81
88
  ## Local Testing
82
89
  ```bash
83
90
  git clone https://github.com/ajcm474/patch-fixer.git
@@ -14,7 +14,12 @@ def fix_command(args):
14
14
  with open(args.broken_patch, encoding='utf-8') as f:
15
15
  patch_lines = f.readlines()
16
16
 
17
- fixed_lines = fix_patch(patch_lines, args.original)
17
+ fixed_lines = fix_patch(
18
+ patch_lines,
19
+ args.original,
20
+ fuzzy=args.fuzzy,
21
+ add_newline=args.add_newline
22
+ )
18
23
 
19
24
  with open(args.output, 'w', encoding='utf-8') as f:
20
25
  f.writelines(fixed_lines)
@@ -77,6 +82,16 @@ def main():
77
82
  'output',
78
83
  help='Path where the fixed patch will be written'
79
84
  )
85
+ fix_parser.add_argument(
86
+ '--fuzzy',
87
+ action='store_true',
88
+ help='Enable fuzzy string matching when finding hunks in original files'
89
+ )
90
+ fix_parser.add_argument(
91
+ '--add-newline',
92
+ action='store_true',
93
+ help='Add final newline when processing "No newline at end of file" markers'
94
+ )
80
95
 
81
96
  # split command
82
97
  split_parser = subparsers.add_parser(
@@ -2,6 +2,7 @@
2
2
  import os
3
3
  import re
4
4
  import sys
5
+ import warnings
5
6
  from pathlib import Path
6
7
 
7
8
  from git import Repo
@@ -61,7 +62,29 @@ def normalize_line(line):
61
62
  return core + "\n"
62
63
 
63
64
 
64
- def find_hunk_start(context_lines, original_lines):
65
+ def fuzzy_line_similarity(line1, line2, threshold=0.8):
66
+ """Calculate similarity between two lines using a simple ratio."""
67
+ if not line1 or not line2:
68
+ return 0.0
69
+
70
+ l1, l2 = line1.strip(), line2.strip()
71
+
72
+ if l1 == l2:
73
+ return 1.0
74
+
75
+ if len(l1) == 0 or len(l2) == 0:
76
+ return 0.0
77
+
78
+ # count common characters
79
+ common = 0
80
+ for char in set(l1) & set(l2):
81
+ common += min(l1.count(char), l2.count(char))
82
+
83
+ total_chars = len(l1) + len(l2)
84
+ return (2.0 * common) / total_chars if total_chars > 0 else 0.0
85
+
86
+
87
+ def find_hunk_start(context_lines, original_lines, fuzzy=False):
65
88
  """Search original_lines for context_lines and return start line index (0-based)."""
66
89
  ctx = []
67
90
  for line in context_lines:
@@ -74,11 +97,33 @@ def find_hunk_start(context_lines, original_lines):
74
97
  ctx.append(line)
75
98
  if not ctx:
76
99
  raise ValueError("Cannot search for empty hunk.")
100
+
101
+ # first try exact matching
77
102
  for i in range(len(original_lines) - len(ctx) + 1):
78
103
  # this part will fail if the diff is malformed beyond hunk header
79
- equal_lines = [original_lines[i+j].strip() == ctx[j].strip() for j in range(len(ctx))]
104
+ equal_lines = [original_lines[i + j].strip() == ctx[j].strip() for j in range(len(ctx))]
80
105
  if all(equal_lines):
81
106
  return i
107
+
108
+ # if fuzzy matching is enabled and exact match failed, try fuzzy match
109
+ if fuzzy:
110
+ best_match_score = 0.0
111
+ best_match_pos = 0
112
+
113
+ for i in range(len(original_lines) - len(ctx) + 1):
114
+ total_similarity = 0.0
115
+ for j in range(len(ctx)):
116
+ similarity = fuzzy_line_similarity(original_lines[i + j], ctx[j])
117
+ total_similarity += similarity
118
+
119
+ avg_similarity = total_similarity / len(ctx)
120
+ if avg_similarity > best_match_score and avg_similarity > 0.6:
121
+ best_match_score = avg_similarity
122
+ best_match_pos = i
123
+
124
+ if best_match_score > 0.6:
125
+ return best_match_pos
126
+
82
127
  return 0
83
128
 
84
129
 
@@ -111,14 +156,14 @@ def reconstruct_file_header(diff_line, header_type):
111
156
  raise ValueError(f"Unsupported header type: {header_type}")
112
157
 
113
158
 
114
- def capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context):
159
+ def capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context, fuzzy=False):
115
160
  # compute line counts
116
161
  old_count = sum(1 for l in current_hunk if l.startswith((' ', '-')))
117
162
  new_count = sum(1 for l in current_hunk if l.startswith((' ', '+')))
118
163
 
119
164
  if old_count > 0:
120
165
  # compute starting line in original file
121
- old_start = find_hunk_start(current_hunk, original_lines) + 1
166
+ old_start = find_hunk_start(current_hunk, original_lines, fuzzy=fuzzy) + 1
122
167
 
123
168
  # if the line number descends, we either have a bad match or a new file
124
169
  if old_start < last_hunk:
@@ -147,7 +192,11 @@ def capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context):
147
192
 
148
193
  def regenerate_index(old_path, new_path, cur_dir):
149
194
  repo = Repo(cur_dir)
150
- mode = " 100644" # TODO: check if mode can be a different number
195
+
196
+ # Common git file modes: 100644 (regular file), 100755 (executable file),
197
+ # 120000 (symbolic link), 160000 (submodule), 040000 (tree/directory)
198
+ # TODO: guess mode based on above information
199
+ mode = " 100644"
151
200
 
152
201
  # file deletion
153
202
  if new_path == "/dev/null":
@@ -164,12 +213,15 @@ def regenerate_index(old_path, new_path, cur_dir):
164
213
  return f"index {old_sha}..{new_sha}{mode}\n"
165
214
 
166
215
 
167
- def fix_patch(patch_lines, original, remove_binary=False):
216
+ def fix_patch(patch_lines, original, remove_binary=False, fuzzy=False, add_newline=False):
168
217
  dir_mode = os.path.isdir(original)
169
218
  original_path = Path(original).absolute()
170
219
 
171
220
  # make relative paths in the diff work
172
- os.chdir(original_path)
221
+ if dir_mode:
222
+ os.chdir(original_path)
223
+ else:
224
+ os.chdir(original_path.parent)
173
225
 
174
226
  fixed_lines = []
175
227
  current_hunk = []
@@ -201,7 +253,7 @@ def fix_patch(patch_lines, original, remove_binary=False):
201
253
  fixed_header,
202
254
  offset,
203
255
  last_hunk
204
- ) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context)
256
+ ) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context, fuzzy=fuzzy)
205
257
  except MissingHunkError:
206
258
  raise NotImplementedError(f"Could not find hunk in {current_file}:"
207
259
  f"\n\n{''.join(current_hunk)}")
@@ -224,7 +276,12 @@ def fix_patch(patch_lines, original, remove_binary=False):
224
276
  last_mode = i
225
277
  fixed_lines.append(normalize_line(line))
226
278
  case "INDEX_LINE":
227
- # TODO: verify that mode is present for anything but deletion
279
+ # mode should be present in index line for all operations except file deletion
280
+ # for deletions, the mode is omitted since the file no longer exists
281
+ index_line = normalize_line(line).strip()
282
+ if not index_line.endswith("..0000000") and not re.search(r' [0-7]{6}$', index_line):
283
+ # TODO: this is the right idea, but a poor implementation
284
+ pass
228
285
  last_index = i
229
286
  similarity_index = match_groups[0]
230
287
  if similarity_index:
@@ -238,7 +295,9 @@ def fix_patch(patch_lines, original, remove_binary=False):
238
295
  fixed_lines.append(normalize_line(line))
239
296
  case "RENAME_FROM":
240
297
  if not look_for_rename:
241
- pass # TODO: handle missing index line
298
+ # handle case where rename from appears without corresponding index line
299
+ # this may indicate a malformed patch, but we can try to continue
300
+ warnings.warn(f"Warning: 'rename from' found without expected index line at line {i+1}")
242
301
  if binary_file:
243
302
  raise NotImplementedError("Renaming binary files not yet supported")
244
303
  if last_index != i - 1:
@@ -252,7 +311,10 @@ def fix_patch(patch_lines, original, remove_binary=False):
252
311
  offset = 0
253
312
  last_hunk = 0
254
313
  if not Path.exists(current_path):
255
- # TODO: verify whether this block is necessary at all
314
+ # this is meant to handle cases where the source file
315
+ # doesn't exist (e.g., when applying a patch that renames
316
+ # a file created earlier in the same patch)
317
+ # TODO: but really, does that ever happen???
256
318
  fixed_lines.append(normalize_line(line))
257
319
  look_for_rename = True
258
320
  file_loaded = False
@@ -273,7 +335,12 @@ def fix_patch(patch_lines, original, remove_binary=False):
273
335
  last_index = i - 2
274
336
  else:
275
337
  raise NotImplementedError("Missing `rename from` header not yet supported.")
276
- # TODO: do something sensible if `look_for_rename` is false
338
+ if not look_for_rename:
339
+ # if we're not looking for a rename but encounter "rename to",
340
+ # this indicates a malformed patch - log warning but continue
341
+ warnings.warn(
342
+ f"Warning: unexpected 'rename to' found at line {i + 1} without corresponding 'rename from'"
343
+ )
277
344
  current_file = match_groups[0]
278
345
  current_path = Path(current_file).absolute()
279
346
  if current_file and current_path.is_dir():
@@ -412,7 +479,7 @@ def fix_patch(patch_lines, original, remove_binary=False):
412
479
  fixed_header,
413
480
  offset,
414
481
  last_hunk
415
- ) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context)
482
+ ) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context, fuzzy=fuzzy)
416
483
  except MissingHunkError:
417
484
  raise NotImplementedError(f"Could not find hunk in {current_file}:"
418
485
  f"\n\n{''.join(current_hunk)}")
@@ -421,10 +488,13 @@ def fix_patch(patch_lines, original, remove_binary=False):
421
488
  current_hunk = []
422
489
  hunk_context = match_groups[4]
423
490
  case "END_LINE":
424
- # TODO: add newline at end of file if user requests
425
- fixed_lines.append(normalize_line(line))
491
+ # if user requested, add a newline at end of file when this marker is present
492
+ if add_newline:
493
+ fixed_lines.append("\n")
494
+ else:
495
+ fixed_lines.append(normalize_line(line))
426
496
  case _:
427
- # TODO: fuzzy string matching
497
+ # TODO: fix fuzzy string matching to be less granular
428
498
  # this is a normal line, add to current hunk
429
499
  current_hunk.append(normalize_line(line))
430
500
 
@@ -434,7 +504,7 @@ def fix_patch(patch_lines, original, remove_binary=False):
434
504
  fixed_header,
435
505
  offset,
436
506
  last_hunk
437
- ) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context)
507
+ ) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context, fuzzy=fuzzy)
438
508
  except MissingHunkError:
439
509
  raise NotImplementedError(f"Could not find hunk in {current_file}:"
440
510
  f"\n\n{''.join(current_hunk)}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patch-fixer
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: Fixes erroneous git apply patches to the best of its ability.
5
5
  Maintainer-email: Alex Mueller <amueller474@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -107,6 +107,13 @@ with open("excluded.patch", 'w', encoding='utf-8') as f:
107
107
  f.writelines(excluded)
108
108
  ```
109
109
 
110
+ ## Known Limitations
111
+
112
+ - When fixing patches with missing `index` lines, the tool requires the files to be in a git repository to regenerate the index. This is only needed for file deletions and renames.
113
+ - `patch-fixer` assumes the patch follows git's unified diff format.
114
+ - Current implementation is not very robust to corrupted hunk content
115
+ - Much more comprehensive fuzzy string matching is planned
116
+
110
117
  ## Local Testing
111
118
  ```bash
112
119
  git clone https://github.com/ajcm474/patch-fixer.git
@@ -11,6 +11,8 @@ patch_fixer.egg-info/dependency_links.txt
11
11
  patch_fixer.egg-info/entry_points.txt
12
12
  patch_fixer.egg-info/requires.txt
13
13
  patch_fixer.egg-info/top_level.txt
14
+ tests/test_cli.py
15
+ tests/test_fuzzy.py
14
16
  tests/test_norm.py
15
17
  tests/test_repos.py
16
18
  tests/test_split.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "patch-fixer"
7
- version = "0.3.3"
7
+ version = "0.3.4"
8
8
  description = "Fixes erroneous git apply patches to the best of its ability."
9
9
  maintainers = [
10
10
  {name = "Alex Mueller", email="amueller474@gmail.com"},
@@ -0,0 +1,149 @@
1
+ """Tests for the CLI module."""
2
+
3
+ import os
4
+ import tempfile
5
+ from unittest.mock import patch
6
+
7
+ import pytest
8
+
9
+ from patch_fixer.cli import main
10
+
11
+
12
+ class TestCLI:
13
+ """Test cases for CLI functionality."""
14
+
15
+ def test_no_command(self, capsys):
16
+ """Test that help is shown when no command is provided."""
17
+ with patch('sys.argv', ['patch-fixer']):
18
+ result = main()
19
+ assert result == 1
20
+ captured = capsys.readouterr()
21
+ assert 'usage: patch-fixer' in captured.out
22
+ assert 'Available commands' in captured.out
23
+
24
+ def test_fix_command(self):
25
+ """Test the fix command in directory mode."""
26
+ with tempfile.TemporaryDirectory() as tmpdir:
27
+ # create test files
28
+ original_file = os.path.join(tmpdir, 'original.txt')
29
+ with open(original_file, 'w') as f:
30
+ f.write("line1\nline2\nline3\n")
31
+
32
+ broken_patch = os.path.join(tmpdir, 'broken.patch')
33
+ with open(broken_patch, 'w') as f:
34
+ f.write("""diff --git a/original.txt b/original.txt
35
+ --- a/original.txt
36
+ +++ b/original.txt
37
+ @@ -1,3 +1,3 @@
38
+ line1
39
+ -line2
40
+ +modified line2
41
+ line3
42
+ """)
43
+
44
+ output_patch = os.path.join(tmpdir, 'fixed.patch')
45
+
46
+ # use directory mode to work around bug in file mode
47
+ with patch('sys.argv', ['patch-fixer', 'fix', tmpdir, broken_patch, output_patch]):
48
+ result = main()
49
+
50
+ assert result == 0
51
+ assert os.path.exists(output_patch)
52
+
53
+ with open(output_patch) as f:
54
+ content = f.read()
55
+ assert 'diff --git' in content
56
+ assert 'modified line2' in content
57
+
58
+ def test_split_command_with_files(self):
59
+ """Test the split command with files specified on command line."""
60
+ with tempfile.TemporaryDirectory() as tmpdir:
61
+ input_patch = os.path.join(tmpdir, 'input.patch')
62
+ with open(input_patch, 'w') as f:
63
+ f.write("""diff --git a/file1.txt b/file1.txt
64
+ --- a/file1.txt
65
+ +++ b/file1.txt
66
+ @@ -1,1 +1,1 @@
67
+ -old1
68
+ +new1
69
+ diff --git a/file2.txt b/file2.txt
70
+ --- a/file2.txt
71
+ +++ b/file2.txt
72
+ @@ -1,1 +1,1 @@
73
+ -old2
74
+ +new2
75
+ """)
76
+
77
+ included = os.path.join(tmpdir, 'included.patch')
78
+ excluded = os.path.join(tmpdir, 'excluded.patch')
79
+
80
+ with patch('sys.argv', ['patch-fixer', 'split', input_patch, included, excluded,
81
+ '-f', 'file1.txt']):
82
+ result = main()
83
+
84
+ assert result == 0
85
+ assert os.path.exists(included)
86
+ assert os.path.exists(excluded)
87
+
88
+ with open(included) as f:
89
+ content = f.read()
90
+ assert 'file1.txt' in content
91
+ assert 'new1' in content
92
+ assert 'file2.txt' not in content
93
+
94
+ with open(excluded) as f:
95
+ content = f.read()
96
+ assert 'file2.txt' in content
97
+ assert 'new2' in content
98
+ assert 'file1.txt' not in content
99
+
100
+ def test_split_command_with_include_file(self):
101
+ """Test the split command with include file."""
102
+ with tempfile.TemporaryDirectory() as tmpdir:
103
+ # create include file
104
+ include_list = os.path.join(tmpdir, 'include.txt')
105
+ with open(include_list, 'w') as f:
106
+ f.write("file1.txt\n")
107
+
108
+ input_patch = os.path.join(tmpdir, 'input.patch')
109
+ with open(input_patch, 'w') as f:
110
+ f.write("""diff --git a/file1.txt b/file1.txt
111
+ --- a/file1.txt
112
+ +++ b/file1.txt
113
+ @@ -1,1 +1,1 @@
114
+ -old1
115
+ +new1
116
+ diff --git a/file2.txt b/file2.txt
117
+ --- a/file2.txt
118
+ +++ b/file2.txt
119
+ @@ -1,1 +1,1 @@
120
+ -old2
121
+ +new2
122
+ """)
123
+
124
+ included = os.path.join(tmpdir, 'included.patch')
125
+ excluded = os.path.join(tmpdir, 'excluded.patch')
126
+
127
+ with patch('sys.argv', ['patch-fixer', 'split', input_patch, included, excluded,
128
+ '-i', include_list]):
129
+ result = main()
130
+
131
+ assert result == 0
132
+ assert os.path.exists(included)
133
+ assert os.path.exists(excluded)
134
+
135
+ with open(included) as f:
136
+ content = f.read()
137
+ assert 'file1.txt' in content
138
+
139
+ with open(excluded) as f:
140
+ content = f.read()
141
+ assert 'file2.txt' in content
142
+
143
+ def test_error_handling(self, capsys):
144
+ """Test error handling in CLI."""
145
+ with patch('sys.argv', ['patch-fixer', 'fix', 'nonexistent', 'nonexistent', 'out']):
146
+ result = main()
147
+ assert result == 1
148
+ captured = capsys.readouterr()
149
+ assert 'Error:' in captured.err
@@ -0,0 +1,112 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import pytest
4
+ from patch_fixer.patch_fixer import fuzzy_line_similarity, find_hunk_start
5
+
6
+
7
+ class TestFuzzyMatching:
8
+ """Test fuzzy string matching functionality."""
9
+
10
+ def test_fuzzy_line_similarity_exact_match(self):
11
+ """Test fuzzy similarity with exact matches."""
12
+ assert fuzzy_line_similarity("hello world", "hello world") == 1.0
13
+ assert fuzzy_line_similarity("", "") == 1.0
14
+
15
+ def test_fuzzy_line_similarity_no_match(self):
16
+ """Test fuzzy similarity with no common characters."""
17
+ assert fuzzy_line_similarity("abc", "xyz") == 0.0
18
+ assert fuzzy_line_similarity("", "xyz") == 0.0
19
+ assert fuzzy_line_similarity("abc", "") == 0.0
20
+
21
+ def test_fuzzy_line_similarity_partial_match(self):
22
+ """Test fuzzy similarity with partial matches."""
23
+ # "hello" and "hell" share 4 characters
24
+ similarity = fuzzy_line_similarity("hello", "hell")
25
+ assert 0.7 < similarity < 1.0
26
+
27
+ # common characters but different order
28
+ similarity = fuzzy_line_similarity("abc", "bac")
29
+ assert similarity > 0.5
30
+
31
+ def test_fuzzy_line_similarity_whitespace(self):
32
+ """Test fuzzy similarity handles whitespace correctly."""
33
+ assert fuzzy_line_similarity(" hello ", "hello") == 1.0
34
+ assert fuzzy_line_similarity("\thello\n", "hello") == 1.0
35
+
36
+ def test_find_hunk_start_exact_match(self):
37
+ """Test exact matching in find_hunk_start."""
38
+ original_lines = [
39
+ "line 1\n",
40
+ "line 2\n",
41
+ "line 3\n",
42
+ "line 4\n"
43
+ ]
44
+ context_lines = [
45
+ " line 2\n",
46
+ " line 3\n"
47
+ ]
48
+
49
+ result = find_hunk_start(context_lines, original_lines, fuzzy=False)
50
+ assert result == 1 # should find match at line 1 (0-indexed)
51
+
52
+ def test_find_hunk_start_fuzzy_match(self):
53
+ """Test fuzzy matching in find_hunk_start."""
54
+ original_lines = [
55
+ "line 1\n",
56
+ "line two\n", # slightly different
57
+ "line 3\n",
58
+ "line 4\n"
59
+ ]
60
+ context_lines = [
61
+ " line 2\n", # different from "line two"
62
+ " line 3\n"
63
+ ]
64
+
65
+ # exact match should fail
66
+ result_exact = find_hunk_start(context_lines, original_lines, fuzzy=False)
67
+ assert result_exact == 0 # should return 0 when no exact match
68
+
69
+ # fuzzy match should succeed
70
+ result_fuzzy = find_hunk_start(context_lines, original_lines, fuzzy=True)
71
+ assert result_fuzzy == 1 # should find fuzzy match at line 1
72
+
73
+ def test_find_hunk_start_with_deletions(self):
74
+ """Test hunk finding with deletion context."""
75
+ original_lines = [
76
+ "line 1\n",
77
+ "line 2\n",
78
+ "line 3\n",
79
+ "line 4\n"
80
+ ]
81
+ context_lines = [
82
+ " line 1\n", # context
83
+ "-line 2\n", # deletion - should match original
84
+ " line 3\n" # context
85
+ ]
86
+
87
+ result = find_hunk_start(context_lines, original_lines, fuzzy=False)
88
+ assert result == 0 # should find match at line 0
89
+
90
+ def test_find_hunk_start_empty_context(self):
91
+ """Test that empty context raises ValueError."""
92
+ original_lines = ["line 1\n", "line 2\n"]
93
+
94
+ with pytest.raises(ValueError, match="Cannot search for empty hunk"):
95
+ find_hunk_start([], original_lines)
96
+
97
+ def test_find_hunk_start_fuzzy_threshold(self):
98
+ """Test fuzzy matching threshold behavior."""
99
+ original_lines = [
100
+ "completely different content\n",
101
+ "another different line\n",
102
+ "line 3\n",
103
+ "line 4\n"
104
+ ]
105
+ context_lines = [
106
+ " line 1\n", # very different from original
107
+ " line 2\n" # very different from original
108
+ ]
109
+
110
+ # even with fuzzy matching, very different content should not match
111
+ result = find_hunk_start(context_lines, original_lines, fuzzy=True)
112
+ assert result == 0 # should return 0 when similarity is too low
@@ -44,6 +44,7 @@ REPOS = {
44
44
  }
45
45
 
46
46
  CACHE_DIR = Path.home() / ".patch-testing"
47
+ DIFF_CACHE_DIR = CACHE_DIR / "diffs"
47
48
 
48
49
 
49
50
  class DeletedBranchError(ValueError):
@@ -69,8 +70,7 @@ def download_commit_zip(repo_url, commit_hash: str, dest_path: Path) -> None:
69
70
  try:
70
71
  r = requests.get(url, stream=True)
71
72
  r.raise_for_status()
72
- except Exception as e:
73
- # TODO: don't use bare except
73
+ except (requests.RequestException, requests.HTTPError) as e:
74
74
  print(f"Failed to download commit snapshot: {e}")
75
75
  sys.exit(1)
76
76
 
@@ -102,11 +102,19 @@ def clone_repos(repo_group, repo_name, old_commit, new_commit):
102
102
  if not new_exists:
103
103
  shutil.copytree(repo_old_path, repo_new_path)
104
104
 
105
- # TODO: handle deleted branches here too
106
105
  repo_old = Repo(repo_old_path)
107
106
  repo_new = Repo(repo_new_path)
108
- repo_old.git.reset("--hard", old_commit)
109
- repo_new.git.reset("--hard", new_commit)
107
+ try:
108
+ verify_commit_exists(repo_old, old_commit)
109
+ repo_old.git.reset("--hard", old_commit)
110
+ except DeletedBranchError:
111
+ download_commit_zip(f"https://github.com/{repo_group}/{repo_name}", old_commit, repo_old_path)
112
+
113
+ try:
114
+ verify_commit_exists(repo_new, new_commit)
115
+ repo_new.git.reset("--hard", new_commit)
116
+ except DeletedBranchError:
117
+ download_commit_zip(f"https://github.com/{repo_group}/{repo_name}", new_commit, repo_new_path)
110
118
 
111
119
  # otherwise, clone it and make a copy for each commit
112
120
  else:
@@ -133,20 +141,39 @@ def clone_repos(repo_group, repo_name, old_commit, new_commit):
133
141
  return repo_old, repo_old_path, repo_new, repo_new_path
134
142
 
135
143
 
144
+ def get_cached_diff(repo_group, repo_name, old_commit, new_commit):
145
+ """Get diff from cache or generate and cache it."""
146
+ DIFF_CACHE_DIR.mkdir(parents=True, exist_ok=True)
147
+
148
+ diff_filename = f"{repo_group}_{repo_name}_{old_commit}_{new_commit}.diff"
149
+ diff_path = DIFF_CACHE_DIR / diff_filename
150
+
151
+ if diff_path.exists():
152
+ with open(diff_path, 'r', encoding='utf-8') as f:
153
+ return f.read()
154
+
155
+ # generate diff and cache it
156
+ (repo_old, repo_old_path, repo_new, repo_new_path) = clone_repos(repo_group, repo_name, old_commit, new_commit)
157
+ diff_content = repo_new.git.diff(old_commit, new_commit)
158
+
159
+ with open(diff_path, 'w', encoding='utf-8') as f:
160
+ f.write(diff_content)
161
+
162
+ return diff_content
163
+
164
+
136
165
  @pytest.mark.parametrize(
137
166
  "repo_group, repo_name, old_commit, new_commit",
138
167
  [(*repo, *commits) for repo, commits in REPOS.items()]
139
168
  )
140
169
  def test_integration_equality(repo_group, repo_name, old_commit, new_commit):
141
170
  """ Make sure the patch fixer doesn't corrupt valid diffs. """
142
- (
143
- repo_old,
144
- repo_old_path,
145
- repo_new,
146
- repo_new_path
147
- ) = clone_repos(repo_group, repo_name, old_commit, new_commit)
148
-
149
- expected = repo_new.git.diff(old_commit, new_commit)
171
+ # use cached diff if available, otherwise generate and cache it
172
+ expected = get_cached_diff(repo_group, repo_name, old_commit, new_commit)
173
+
174
+ # we still need the old repo path for the patch fixer
175
+ (repo_old, repo_old_path, _, _) = clone_repos(repo_group, repo_name, old_commit, new_commit)
176
+
150
177
  input_lines = expected.splitlines(keepends=True)
151
178
  fixed_lines = fix_patch(input_lines, repo_old_path)
152
179
  actual = "".join(fixed_lines)
File without changes
File without changes