patch-fixer 0.3.2__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patch_fixer-0.3.4/PKG-INFO +130 -0
- patch_fixer-0.3.4/README.md +101 -0
- patch_fixer-0.3.4/patch_fixer/cli.py +144 -0
- {patch_fixer-0.3.2 → patch_fixer-0.3.4}/patch_fixer/patch_fixer.py +87 -17
- patch_fixer-0.3.4/patch_fixer.egg-info/PKG-INFO +130 -0
- {patch_fixer-0.3.2 → patch_fixer-0.3.4}/patch_fixer.egg-info/SOURCES.txt +4 -0
- patch_fixer-0.3.4/patch_fixer.egg-info/entry_points.txt +2 -0
- {patch_fixer-0.3.2 → patch_fixer-0.3.4}/pyproject.toml +4 -1
- patch_fixer-0.3.4/tests/test_cli.py +149 -0
- patch_fixer-0.3.4/tests/test_fuzzy.py +112 -0
- {patch_fixer-0.3.2 → patch_fixer-0.3.4}/tests/test_norm.py +14 -3
- {patch_fixer-0.3.2 → patch_fixer-0.3.4}/tests/test_repos.py +40 -13
- patch_fixer-0.3.2/PKG-INFO +0 -79
- patch_fixer-0.3.2/README.md +0 -50
- patch_fixer-0.3.2/patch_fixer.egg-info/PKG-INFO +0 -79
- {patch_fixer-0.3.2 → patch_fixer-0.3.4}/LICENSE +0 -0
- {patch_fixer-0.3.2 → patch_fixer-0.3.4}/patch_fixer/__init__.py +0 -0
- {patch_fixer-0.3.2 → patch_fixer-0.3.4}/patch_fixer/split.py +0 -0
- {patch_fixer-0.3.2 → patch_fixer-0.3.4}/patch_fixer.egg-info/dependency_links.txt +0 -0
- {patch_fixer-0.3.2 → patch_fixer-0.3.4}/patch_fixer.egg-info/requires.txt +0 -0
- {patch_fixer-0.3.2 → patch_fixer-0.3.4}/patch_fixer.egg-info/top_level.txt +0 -0
- {patch_fixer-0.3.2 → patch_fixer-0.3.4}/setup.cfg +0 -0
- {patch_fixer-0.3.2 → patch_fixer-0.3.4}/tests/test_split.py +0 -0
@@ -0,0 +1,130 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: patch-fixer
|
3
|
+
Version: 0.3.4
|
4
|
+
Summary: Fixes erroneous git apply patches to the best of its ability.
|
5
|
+
Maintainer-email: Alex Mueller <amueller474@gmail.com>
|
6
|
+
License-Expression: Apache-2.0
|
7
|
+
Project-URL: Homepage, https://github.com/ajcm474/patch-fixer
|
8
|
+
Project-URL: Issues, https://github.com/ajcm474/patch-fixer/issues
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
10
|
+
Classifier: Intended Audience :: Developers
|
11
|
+
Classifier: Programming Language :: Python
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
17
|
+
Classifier: Programming Language :: Python :: 3.14
|
18
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
19
|
+
Classifier: Topic :: Software Development
|
20
|
+
Requires-Python: >=3.10
|
21
|
+
Description-Content-Type: text/markdown
|
22
|
+
License-File: LICENSE
|
23
|
+
Requires-Dist: GitPython
|
24
|
+
Provides-Extra: test
|
25
|
+
Requires-Dist: hypothesis; extra == "test"
|
26
|
+
Requires-Dist: pytest; extra == "test"
|
27
|
+
Requires-Dist: requests; extra == "test"
|
28
|
+
Dynamic: license-file
|
29
|
+
|
30
|
+
# patch-fixer
|
31
|
+
So you asked an LLM to generate a code diff, tried to apply it with `git apply`, and got a bunch of malformed patch errors? Well fear no more, `patch-fixer` is here to save the day... more or less.
|
32
|
+
|
33
|
+
This tool can also split patches into separate files based on file lists, making it easy to selectively apply changes.
|
34
|
+
|
35
|
+
## Installation
|
36
|
+
```bash
|
37
|
+
# Make sure you're using at least python 3.10
|
38
|
+
python -m venv .venv/
|
39
|
+
source .venv/bin/activate
|
40
|
+
pip install patch-fixer
|
41
|
+
```
|
42
|
+
|
43
|
+
## Usage
|
44
|
+
|
45
|
+
### Command Line Interface
|
46
|
+
|
47
|
+
After installation, `patch-fixer` provides a unified command-line interface:
|
48
|
+
|
49
|
+
#### Fixing broken patches:
|
50
|
+
```bash
|
51
|
+
patch-fixer fix original broken.patch fixed.patch
|
52
|
+
```
|
53
|
+
where:
|
54
|
+
- `original` is the file or directory you were trying to patch
|
55
|
+
- `broken.patch` is the malformed patch generated by the LLM
|
56
|
+
- `fixed.patch` is the output file containing the (hopefully) fixed patch
|
57
|
+
|
58
|
+
#### Splitting patches by file:
|
59
|
+
```bash
|
60
|
+
# Split with files specified on command line
|
61
|
+
patch-fixer split input.patch included.patch excluded.patch -f file1.py file2.py
|
62
|
+
|
63
|
+
# Split using a file list
|
64
|
+
patch-fixer split input.patch included.patch excluded.patch -i files_to_include.txt
|
65
|
+
```
|
66
|
+
where:
|
67
|
+
- `input.patch` is the patch file to split
|
68
|
+
- `included.patch` will contain changes for the specified files
|
69
|
+
- `excluded.patch` will contain changes for all other files
|
70
|
+
- `-f` allows specifying files directly on the command line
|
71
|
+
- `-i` reads the file list from a text file (one file per line)
|
72
|
+
|
73
|
+
### Python API
|
74
|
+
|
75
|
+
#### Fixing patches:
|
76
|
+
```python
|
77
|
+
from patch_fixer import fix_patch
|
78
|
+
|
79
|
+
patch_file = "/path/to/broken.patch"
|
80
|
+
original = "/path/to/original/state" # file or directory being patched
|
81
|
+
with open(patch_file, encoding="utf-8") as f:
|
82
|
+
patch_lines = f.readlines()
|
83
|
+
|
84
|
+
fixed_lines = fix_patch(patch_lines, original)
|
85
|
+
output_file = "/path/to/fixed.patch"
|
86
|
+
|
87
|
+
with open(output_file, 'w', encoding='utf-8') as f:
|
88
|
+
f.writelines(fixed_lines)
|
89
|
+
```
|
90
|
+
|
91
|
+
#### Splitting patches:
|
92
|
+
```python
|
93
|
+
from patch_fixer import split_patch
|
94
|
+
|
95
|
+
with open("input.patch", encoding="utf-8") as f:
|
96
|
+
patch_lines = f.readlines()
|
97
|
+
|
98
|
+
# split to include only specific files
|
99
|
+
files_to_include = ["./src/main.py", "./src/utils.py"]
|
100
|
+
included, excluded = split_patch(patch_lines, files_to_include)
|
101
|
+
|
102
|
+
# write the split patches
|
103
|
+
with open("included.patch", 'w', encoding='utf-8') as f:
|
104
|
+
f.writelines(included)
|
105
|
+
|
106
|
+
with open("excluded.patch", 'w', encoding='utf-8') as f:
|
107
|
+
f.writelines(excluded)
|
108
|
+
```
|
109
|
+
|
110
|
+
## Known Limitations
|
111
|
+
|
112
|
+
- When fixing patches with missing `index` lines, the tool requires the files to be in a git repository to regenerate the index. This is only needed for file deletions and renames.
|
113
|
+
- `patch-fixer` assumes the patch follows git's unified diff format.
|
114
|
+
- Current implementation is not very robust to corrupted hunk content
|
115
|
+
- Much more comprehensive fuzzy string matching is planned
|
116
|
+
|
117
|
+
## Local Testing
|
118
|
+
```bash
|
119
|
+
git clone https://github.com/ajcm474/patch-fixer.git
|
120
|
+
cd patch-fixer
|
121
|
+
pip install -e .[test]
|
122
|
+
pytest
|
123
|
+
```
|
124
|
+
From version `0.3.0` onward (at least until version `1.0`), some test failures are expected
|
125
|
+
in bugfix versions as I like to use test-driven development to build out new features.
|
126
|
+
Please only report test failures if the same test existed and passed in the most recent `0.x.0` version.
|
127
|
+
|
128
|
+
## License
|
129
|
+
|
130
|
+
This is free and open source software, released under the Apache 2.0 License. See `LICENSE` for details.
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# patch-fixer
|
2
|
+
So you asked an LLM to generate a code diff, tried to apply it with `git apply`, and got a bunch of malformed patch errors? Well fear no more, `patch-fixer` is here to save the day... more or less.
|
3
|
+
|
4
|
+
This tool can also split patches into separate files based on file lists, making it easy to selectively apply changes.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
```bash
|
8
|
+
# Make sure you're using at least python 3.10
|
9
|
+
python -m venv .venv/
|
10
|
+
source .venv/bin/activate
|
11
|
+
pip install patch-fixer
|
12
|
+
```
|
13
|
+
|
14
|
+
## Usage
|
15
|
+
|
16
|
+
### Command Line Interface
|
17
|
+
|
18
|
+
After installation, `patch-fixer` provides a unified command-line interface:
|
19
|
+
|
20
|
+
#### Fixing broken patches:
|
21
|
+
```bash
|
22
|
+
patch-fixer fix original broken.patch fixed.patch
|
23
|
+
```
|
24
|
+
where:
|
25
|
+
- `original` is the file or directory you were trying to patch
|
26
|
+
- `broken.patch` is the malformed patch generated by the LLM
|
27
|
+
- `fixed.patch` is the output file containing the (hopefully) fixed patch
|
28
|
+
|
29
|
+
#### Splitting patches by file:
|
30
|
+
```bash
|
31
|
+
# Split with files specified on command line
|
32
|
+
patch-fixer split input.patch included.patch excluded.patch -f file1.py file2.py
|
33
|
+
|
34
|
+
# Split using a file list
|
35
|
+
patch-fixer split input.patch included.patch excluded.patch -i files_to_include.txt
|
36
|
+
```
|
37
|
+
where:
|
38
|
+
- `input.patch` is the patch file to split
|
39
|
+
- `included.patch` will contain changes for the specified files
|
40
|
+
- `excluded.patch` will contain changes for all other files
|
41
|
+
- `-f` allows specifying files directly on the command line
|
42
|
+
- `-i` reads the file list from a text file (one file per line)
|
43
|
+
|
44
|
+
### Python API
|
45
|
+
|
46
|
+
#### Fixing patches:
|
47
|
+
```python
|
48
|
+
from patch_fixer import fix_patch
|
49
|
+
|
50
|
+
patch_file = "/path/to/broken.patch"
|
51
|
+
original = "/path/to/original/state" # file or directory being patched
|
52
|
+
with open(patch_file, encoding="utf-8") as f:
|
53
|
+
patch_lines = f.readlines()
|
54
|
+
|
55
|
+
fixed_lines = fix_patch(patch_lines, original)
|
56
|
+
output_file = "/path/to/fixed.patch"
|
57
|
+
|
58
|
+
with open(output_file, 'w', encoding='utf-8') as f:
|
59
|
+
f.writelines(fixed_lines)
|
60
|
+
```
|
61
|
+
|
62
|
+
#### Splitting patches:
|
63
|
+
```python
|
64
|
+
from patch_fixer import split_patch
|
65
|
+
|
66
|
+
with open("input.patch", encoding="utf-8") as f:
|
67
|
+
patch_lines = f.readlines()
|
68
|
+
|
69
|
+
# split to include only specific files
|
70
|
+
files_to_include = ["./src/main.py", "./src/utils.py"]
|
71
|
+
included, excluded = split_patch(patch_lines, files_to_include)
|
72
|
+
|
73
|
+
# write the split patches
|
74
|
+
with open("included.patch", 'w', encoding='utf-8') as f:
|
75
|
+
f.writelines(included)
|
76
|
+
|
77
|
+
with open("excluded.patch", 'w', encoding='utf-8') as f:
|
78
|
+
f.writelines(excluded)
|
79
|
+
```
|
80
|
+
|
81
|
+
## Known Limitations
|
82
|
+
|
83
|
+
- When fixing patches with missing `index` lines, the tool requires the files to be in a git repository to regenerate the index. This is only needed for file deletions and renames.
|
84
|
+
- `patch-fixer` assumes the patch follows git's unified diff format.
|
85
|
+
- Current implementation is not very robust to corrupted hunk content
|
86
|
+
- Much more comprehensive fuzzy string matching is planned
|
87
|
+
|
88
|
+
## Local Testing
|
89
|
+
```bash
|
90
|
+
git clone https://github.com/ajcm474/patch-fixer.git
|
91
|
+
cd patch-fixer
|
92
|
+
pip install -e .[test]
|
93
|
+
pytest
|
94
|
+
```
|
95
|
+
From version `0.3.0` onward (at least until version `1.0`), some test failures are expected
|
96
|
+
in bugfix versions as I like to use test-driven development to build out new features.
|
97
|
+
Please only report test failures if the same test existed and passed in the most recent `0.x.0` version.
|
98
|
+
|
99
|
+
## License
|
100
|
+
|
101
|
+
This is free and open source software, released under the Apache 2.0 License. See `LICENSE` for details.
|
@@ -0,0 +1,144 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""Command-line interface for patch-fixer."""
|
3
|
+
|
4
|
+
import argparse
|
5
|
+
import sys
|
6
|
+
from pathlib import Path
|
7
|
+
|
8
|
+
from .patch_fixer import fix_patch
|
9
|
+
from .split import split_patch
|
10
|
+
|
11
|
+
|
12
|
+
def fix_command(args):
|
13
|
+
"""Handle the fix command."""
|
14
|
+
with open(args.broken_patch, encoding='utf-8') as f:
|
15
|
+
patch_lines = f.readlines()
|
16
|
+
|
17
|
+
fixed_lines = fix_patch(
|
18
|
+
patch_lines,
|
19
|
+
args.original,
|
20
|
+
fuzzy=args.fuzzy,
|
21
|
+
add_newline=args.add_newline
|
22
|
+
)
|
23
|
+
|
24
|
+
with open(args.output, 'w', encoding='utf-8') as f:
|
25
|
+
f.writelines(fixed_lines)
|
26
|
+
|
27
|
+
print(f"Fixed patch written to {args.output}")
|
28
|
+
return 0
|
29
|
+
|
30
|
+
|
31
|
+
def split_command(args):
|
32
|
+
"""Handle the split command."""
|
33
|
+
with open(args.patch_file, encoding='utf-8') as f:
|
34
|
+
patch_lines = f.readlines()
|
35
|
+
|
36
|
+
# read files to include from file or command line
|
37
|
+
if args.include_file:
|
38
|
+
with open(args.include_file, encoding='utf-8') as f:
|
39
|
+
files_to_include = [line.strip() for line in f if line.strip()]
|
40
|
+
else:
|
41
|
+
files_to_include = args.files or []
|
42
|
+
|
43
|
+
included, excluded = split_patch(patch_lines, files_to_include)
|
44
|
+
|
45
|
+
# write output files
|
46
|
+
with open(args.included_output, 'w', encoding='utf-8') as f:
|
47
|
+
f.writelines(included)
|
48
|
+
|
49
|
+
with open(args.excluded_output, 'w', encoding='utf-8') as f:
|
50
|
+
f.writelines(excluded)
|
51
|
+
|
52
|
+
print(f"Patch split into:")
|
53
|
+
print(f" Included: {args.included_output} ({len(included)} lines)")
|
54
|
+
print(f" Excluded: {args.excluded_output} ({len(excluded)} lines)")
|
55
|
+
|
56
|
+
return 0
|
57
|
+
|
58
|
+
|
59
|
+
def main():
|
60
|
+
"""Main entry point for the CLI."""
|
61
|
+
parser = argparse.ArgumentParser(
|
62
|
+
prog='patch-fixer',
|
63
|
+
description='Fix broken git patches or split them by file lists.'
|
64
|
+
)
|
65
|
+
|
66
|
+
subparsers = parser.add_subparsers(dest='command', help='Available commands')
|
67
|
+
|
68
|
+
# fix command
|
69
|
+
fix_parser = subparsers.add_parser(
|
70
|
+
'fix',
|
71
|
+
help='Fix a broken patch file'
|
72
|
+
)
|
73
|
+
fix_parser.add_argument(
|
74
|
+
'original',
|
75
|
+
help='Original file or directory that the patch applies to'
|
76
|
+
)
|
77
|
+
fix_parser.add_argument(
|
78
|
+
'broken_patch',
|
79
|
+
help='Path to the broken patch file'
|
80
|
+
)
|
81
|
+
fix_parser.add_argument(
|
82
|
+
'output',
|
83
|
+
help='Path where the fixed patch will be written'
|
84
|
+
)
|
85
|
+
fix_parser.add_argument(
|
86
|
+
'--fuzzy',
|
87
|
+
action='store_true',
|
88
|
+
help='Enable fuzzy string matching when finding hunks in original files'
|
89
|
+
)
|
90
|
+
fix_parser.add_argument(
|
91
|
+
'--add-newline',
|
92
|
+
action='store_true',
|
93
|
+
help='Add final newline when processing "No newline at end of file" markers'
|
94
|
+
)
|
95
|
+
|
96
|
+
# split command
|
97
|
+
split_parser = subparsers.add_parser(
|
98
|
+
'split',
|
99
|
+
help='Split a patch file based on file lists'
|
100
|
+
)
|
101
|
+
split_parser.add_argument(
|
102
|
+
'patch_file',
|
103
|
+
help='Path to the patch file to split'
|
104
|
+
)
|
105
|
+
split_parser.add_argument(
|
106
|
+
'included_output',
|
107
|
+
help='Output file for included files'
|
108
|
+
)
|
109
|
+
split_parser.add_argument(
|
110
|
+
'excluded_output',
|
111
|
+
help='Output file for excluded files'
|
112
|
+
)
|
113
|
+
split_parser.add_argument(
|
114
|
+
'-f', '--files',
|
115
|
+
nargs='*',
|
116
|
+
help='Files to include (can specify multiple)'
|
117
|
+
)
|
118
|
+
split_parser.add_argument(
|
119
|
+
'-i', '--include-file',
|
120
|
+
help='File containing list of files to include (one per line)'
|
121
|
+
)
|
122
|
+
|
123
|
+
# parse arguments
|
124
|
+
args = parser.parse_args()
|
125
|
+
|
126
|
+
if not args.command:
|
127
|
+
parser.print_help()
|
128
|
+
return 1
|
129
|
+
|
130
|
+
# dispatch to appropriate command
|
131
|
+
try:
|
132
|
+
if args.command == 'fix':
|
133
|
+
return fix_command(args)
|
134
|
+
elif args.command == 'split':
|
135
|
+
return split_command(args)
|
136
|
+
except Exception as e:
|
137
|
+
print(f"Error: {e}", file=sys.stderr)
|
138
|
+
return 1
|
139
|
+
|
140
|
+
return 0
|
141
|
+
|
142
|
+
|
143
|
+
if __name__ == "__main__":
|
144
|
+
sys.exit(main())
|
@@ -2,6 +2,7 @@
|
|
2
2
|
import os
|
3
3
|
import re
|
4
4
|
import sys
|
5
|
+
import warnings
|
5
6
|
from pathlib import Path
|
6
7
|
|
7
8
|
from git import Repo
|
@@ -61,7 +62,29 @@ def normalize_line(line):
|
|
61
62
|
return core + "\n"
|
62
63
|
|
63
64
|
|
64
|
-
def
|
65
|
+
def fuzzy_line_similarity(line1, line2, threshold=0.8):
|
66
|
+
"""Calculate similarity between two lines using a simple ratio."""
|
67
|
+
if not line1 or not line2:
|
68
|
+
return 0.0
|
69
|
+
|
70
|
+
l1, l2 = line1.strip(), line2.strip()
|
71
|
+
|
72
|
+
if l1 == l2:
|
73
|
+
return 1.0
|
74
|
+
|
75
|
+
if len(l1) == 0 or len(l2) == 0:
|
76
|
+
return 0.0
|
77
|
+
|
78
|
+
# count common characters
|
79
|
+
common = 0
|
80
|
+
for char in set(l1) & set(l2):
|
81
|
+
common += min(l1.count(char), l2.count(char))
|
82
|
+
|
83
|
+
total_chars = len(l1) + len(l2)
|
84
|
+
return (2.0 * common) / total_chars if total_chars > 0 else 0.0
|
85
|
+
|
86
|
+
|
87
|
+
def find_hunk_start(context_lines, original_lines, fuzzy=False):
|
65
88
|
"""Search original_lines for context_lines and return start line index (0-based)."""
|
66
89
|
ctx = []
|
67
90
|
for line in context_lines:
|
@@ -74,11 +97,33 @@ def find_hunk_start(context_lines, original_lines):
|
|
74
97
|
ctx.append(line)
|
75
98
|
if not ctx:
|
76
99
|
raise ValueError("Cannot search for empty hunk.")
|
100
|
+
|
101
|
+
# first try exact matching
|
77
102
|
for i in range(len(original_lines) - len(ctx) + 1):
|
78
103
|
# this part will fail if the diff is malformed beyond hunk header
|
79
|
-
equal_lines = [original_lines[i+j].strip() == ctx[j].strip() for j in range(len(ctx))]
|
104
|
+
equal_lines = [original_lines[i + j].strip() == ctx[j].strip() for j in range(len(ctx))]
|
80
105
|
if all(equal_lines):
|
81
106
|
return i
|
107
|
+
|
108
|
+
# if fuzzy matching is enabled and exact match failed, try fuzzy match
|
109
|
+
if fuzzy:
|
110
|
+
best_match_score = 0.0
|
111
|
+
best_match_pos = 0
|
112
|
+
|
113
|
+
for i in range(len(original_lines) - len(ctx) + 1):
|
114
|
+
total_similarity = 0.0
|
115
|
+
for j in range(len(ctx)):
|
116
|
+
similarity = fuzzy_line_similarity(original_lines[i + j], ctx[j])
|
117
|
+
total_similarity += similarity
|
118
|
+
|
119
|
+
avg_similarity = total_similarity / len(ctx)
|
120
|
+
if avg_similarity > best_match_score and avg_similarity > 0.6:
|
121
|
+
best_match_score = avg_similarity
|
122
|
+
best_match_pos = i
|
123
|
+
|
124
|
+
if best_match_score > 0.6:
|
125
|
+
return best_match_pos
|
126
|
+
|
82
127
|
return 0
|
83
128
|
|
84
129
|
|
@@ -111,14 +156,14 @@ def reconstruct_file_header(diff_line, header_type):
|
|
111
156
|
raise ValueError(f"Unsupported header type: {header_type}")
|
112
157
|
|
113
158
|
|
114
|
-
def capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context):
|
159
|
+
def capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context, fuzzy=False):
|
115
160
|
# compute line counts
|
116
161
|
old_count = sum(1 for l in current_hunk if l.startswith((' ', '-')))
|
117
162
|
new_count = sum(1 for l in current_hunk if l.startswith((' ', '+')))
|
118
163
|
|
119
164
|
if old_count > 0:
|
120
165
|
# compute starting line in original file
|
121
|
-
old_start = find_hunk_start(current_hunk, original_lines) + 1
|
166
|
+
old_start = find_hunk_start(current_hunk, original_lines, fuzzy=fuzzy) + 1
|
122
167
|
|
123
168
|
# if the line number descends, we either have a bad match or a new file
|
124
169
|
if old_start < last_hunk:
|
@@ -147,7 +192,11 @@ def capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context):
|
|
147
192
|
|
148
193
|
def regenerate_index(old_path, new_path, cur_dir):
|
149
194
|
repo = Repo(cur_dir)
|
150
|
-
|
195
|
+
|
196
|
+
# Common git file modes: 100644 (regular file), 100755 (executable file),
|
197
|
+
# 120000 (symbolic link), 160000 (submodule), 040000 (tree/directory)
|
198
|
+
# TODO: guess mode based on above information
|
199
|
+
mode = " 100644"
|
151
200
|
|
152
201
|
# file deletion
|
153
202
|
if new_path == "/dev/null":
|
@@ -164,12 +213,15 @@ def regenerate_index(old_path, new_path, cur_dir):
|
|
164
213
|
return f"index {old_sha}..{new_sha}{mode}\n"
|
165
214
|
|
166
215
|
|
167
|
-
def fix_patch(patch_lines, original, remove_binary=False):
|
216
|
+
def fix_patch(patch_lines, original, remove_binary=False, fuzzy=False, add_newline=False):
|
168
217
|
dir_mode = os.path.isdir(original)
|
169
218
|
original_path = Path(original).absolute()
|
170
219
|
|
171
220
|
# make relative paths in the diff work
|
172
|
-
|
221
|
+
if dir_mode:
|
222
|
+
os.chdir(original_path)
|
223
|
+
else:
|
224
|
+
os.chdir(original_path.parent)
|
173
225
|
|
174
226
|
fixed_lines = []
|
175
227
|
current_hunk = []
|
@@ -201,7 +253,7 @@ def fix_patch(patch_lines, original, remove_binary=False):
|
|
201
253
|
fixed_header,
|
202
254
|
offset,
|
203
255
|
last_hunk
|
204
|
-
) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context)
|
256
|
+
) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context, fuzzy=fuzzy)
|
205
257
|
except MissingHunkError:
|
206
258
|
raise NotImplementedError(f"Could not find hunk in {current_file}:"
|
207
259
|
f"\n\n{''.join(current_hunk)}")
|
@@ -224,7 +276,12 @@ def fix_patch(patch_lines, original, remove_binary=False):
|
|
224
276
|
last_mode = i
|
225
277
|
fixed_lines.append(normalize_line(line))
|
226
278
|
case "INDEX_LINE":
|
227
|
-
#
|
279
|
+
# mode should be present in index line for all operations except file deletion
|
280
|
+
# for deletions, the mode is omitted since the file no longer exists
|
281
|
+
index_line = normalize_line(line).strip()
|
282
|
+
if not index_line.endswith("..0000000") and not re.search(r' [0-7]{6}$', index_line):
|
283
|
+
# TODO: this is the right idea, but a poor implementation
|
284
|
+
pass
|
228
285
|
last_index = i
|
229
286
|
similarity_index = match_groups[0]
|
230
287
|
if similarity_index:
|
@@ -238,7 +295,9 @@ def fix_patch(patch_lines, original, remove_binary=False):
|
|
238
295
|
fixed_lines.append(normalize_line(line))
|
239
296
|
case "RENAME_FROM":
|
240
297
|
if not look_for_rename:
|
241
|
-
|
298
|
+
# handle case where rename from appears without corresponding index line
|
299
|
+
# this may indicate a malformed patch, but we can try to continue
|
300
|
+
warnings.warn(f"Warning: 'rename from' found without expected index line at line {i+1}")
|
242
301
|
if binary_file:
|
243
302
|
raise NotImplementedError("Renaming binary files not yet supported")
|
244
303
|
if last_index != i - 1:
|
@@ -252,7 +311,10 @@ def fix_patch(patch_lines, original, remove_binary=False):
|
|
252
311
|
offset = 0
|
253
312
|
last_hunk = 0
|
254
313
|
if not Path.exists(current_path):
|
255
|
-
#
|
314
|
+
# this is meant to handle cases where the source file
|
315
|
+
# doesn't exist (e.g., when applying a patch that renames
|
316
|
+
# a file created earlier in the same patch)
|
317
|
+
# TODO: but really, does that ever happen???
|
256
318
|
fixed_lines.append(normalize_line(line))
|
257
319
|
look_for_rename = True
|
258
320
|
file_loaded = False
|
@@ -273,7 +335,12 @@ def fix_patch(patch_lines, original, remove_binary=False):
|
|
273
335
|
last_index = i - 2
|
274
336
|
else:
|
275
337
|
raise NotImplementedError("Missing `rename from` header not yet supported.")
|
276
|
-
|
338
|
+
if not look_for_rename:
|
339
|
+
# if we're not looking for a rename but encounter "rename to",
|
340
|
+
# this indicates a malformed patch - log warning but continue
|
341
|
+
warnings.warn(
|
342
|
+
f"Warning: unexpected 'rename to' found at line {i + 1} without corresponding 'rename from'"
|
343
|
+
)
|
277
344
|
current_file = match_groups[0]
|
278
345
|
current_path = Path(current_file).absolute()
|
279
346
|
if current_file and current_path.is_dir():
|
@@ -412,7 +479,7 @@ def fix_patch(patch_lines, original, remove_binary=False):
|
|
412
479
|
fixed_header,
|
413
480
|
offset,
|
414
481
|
last_hunk
|
415
|
-
) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context)
|
482
|
+
) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context, fuzzy=fuzzy)
|
416
483
|
except MissingHunkError:
|
417
484
|
raise NotImplementedError(f"Could not find hunk in {current_file}:"
|
418
485
|
f"\n\n{''.join(current_hunk)}")
|
@@ -421,10 +488,13 @@ def fix_patch(patch_lines, original, remove_binary=False):
|
|
421
488
|
current_hunk = []
|
422
489
|
hunk_context = match_groups[4]
|
423
490
|
case "END_LINE":
|
424
|
-
#
|
425
|
-
|
491
|
+
# if user requested, add a newline at end of file when this marker is present
|
492
|
+
if add_newline:
|
493
|
+
fixed_lines.append("\n")
|
494
|
+
else:
|
495
|
+
fixed_lines.append(normalize_line(line))
|
426
496
|
case _:
|
427
|
-
# TODO: fuzzy string matching
|
497
|
+
# TODO: fix fuzzy string matching to be less granular
|
428
498
|
# this is a normal line, add to current hunk
|
429
499
|
current_hunk.append(normalize_line(line))
|
430
500
|
|
@@ -434,7 +504,7 @@ def fix_patch(patch_lines, original, remove_binary=False):
|
|
434
504
|
fixed_header,
|
435
505
|
offset,
|
436
506
|
last_hunk
|
437
|
-
) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context)
|
507
|
+
) = capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context, fuzzy=fuzzy)
|
438
508
|
except MissingHunkError:
|
439
509
|
raise NotImplementedError(f"Could not find hunk in {current_file}:"
|
440
510
|
f"\n\n{''.join(current_hunk)}")
|