PyPI - patch-fixer - Versions diffs - 0.2.3__tar.gz → 0.3.0__tar.gz - Mend

patch-fixer 0.2.3tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{patch_fixer-0.2.3 → patch_fixer-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: patch-fixer
-Version: 0.2.3
+Version: 0.3.0
 Summary: Fixes erroneous git apply patches to the best of its ability.
 Maintainer-email: Alex Mueller <amueller474@gmail.com>
 License-Expression: Apache-2.0
@@ -69,8 +69,9 @@ cd patch-fixer
 pip install -e .[test]
 pytest
 ```
-Note that some test failures are expected as this project is in the early alpha stage.
-Please only report test failures if the same test passed in a previous version.
+From version `0.3.0` onward (at least until version `1.0`), some test failures are expected
+in bugfix versions as I like to use test-driven development to build out new features.
+Please only report test failures if the same test passed in the most recent `0.x.0` version.
 ## License

{patch_fixer-0.2.3 → patch_fixer-0.3.0}/README.md RENAMED Viewed

@@ -41,8 +41,9 @@ cd patch-fixer
 pip install -e .[test]
 pytest
 ```
-Note that some test failures are expected as this project is in the early alpha stage.
-Please only report test failures if the same test passed in a previous version.
+From version `0.3.0` onward (at least until version `1.0`), some test failures are expected
+in bugfix versions as I like to use test-driven development to build out new features.
+Please only report test failures if the same test passed in the most recent `0.x.0` version.
 ## License

{patch_fixer-0.2.3 → patch_fixer-0.3.0}/patch_fixer/patch_fixer.py RENAMED Viewed

@@ -6,16 +6,16 @@ from pathlib import Path
 from git import Repo
-path_regex = r'(?:/[A-Za-z0-9_.-]+)*'
+path_regex = r'(?:[A-Za-z0-9_.-]+/?)+'
 regexes = {
-    "DIFF_LINE": re.compile(rf'diff --git (a{path_regex}+) (b{path_regex}+)'),
+    "DIFF_LINE": re.compile(rf'diff --git (a/{path_regex}) (b/{path_regex})'),
     "MODE_LINE": re.compile(r'(new|deleted) file mode [0-7]{6}'),
     "INDEX_LINE": re.compile(r'index [0-9a-f]{7,64}\.\.[0-9a-f]{7,64}(?: [0-7]{6})?|similarity index ([0-9]+)%'),
-    "BINARY_LINE": re.compile(rf'Binary files (a{path_regex}+|/dev/null) and (b{path_regex}+|/dev/null) differ'),
+    "BINARY_LINE": re.compile(rf'Binary files (a/{path_regex}|/dev/null) and (b/{path_regex}|/dev/null) differ'),
     "RENAME_FROM": re.compile(rf'rename from ({path_regex})'),
     "RENAME_TO": re.compile(rf'rename to ({path_regex})'),
-    "FILE_HEADER_START": re.compile(rf'--- (a{path_regex}+|/dev/null)'),
-    "FILE_HEADER_END": re.compile(rf'\+\+\+ (b{path_regex}+|/dev/null)'),
+    "FILE_HEADER_START": re.compile(rf'--- (a/{path_regex}|/dev/null)'),
+    "FILE_HEADER_END": re.compile(rf'\+\+\+ (b/{path_regex}|/dev/null)'),
     "HUNK_HEADER": re.compile(r'^@@ -(\d+),(\d+) \+(\d+),(\d+) @@(.*)$'),
     "END_LINE": re.compile(r'\')
 }
@@ -26,12 +26,9 @@ class MissingHunkError(Exception):
 def normalize_line(line):
-    if line.startswith('+'):
-        # safe to normalize new content
-        return '+' + line[1:].rstrip() + "\n"
-    else:
-        # preserve exactly (only normalize line endings)
-        return line.rstrip("\r\n") + "\n"
+    # preserve whitespace, only normalize line endings
+    return line.rstrip("\r\n") + "\n"
 def find_hunk_start(context_lines, original_lines):
     """Search original_lines for context_lines and return start line index (0-based)."""
@@ -40,7 +37,8 @@ def find_hunk_start(context_lines, original_lines):
         if line.startswith(" "):
             ctx.append(line.lstrip(" "))
         elif line.startswith("-"):
-            ctx.append(line.lstrip("-"))
+            # can't use lstrip; we want to keep other dashes in the line
+            ctx.append(line[1:])
         elif line.isspace() or line == "":
             ctx.append(line)
     if not ctx:
@@ -75,9 +73,9 @@ def reconstruct_file_header(diff_line, header_type):
     a, b = diff_groups
     match header_type:
         case "FILE_HEADER_START":
-            return f"--- {a}"
+            return f"--- {a}\n"
         case "FILE_HEADER_END":
-            return f"+++ {b}"
+            return f"+++ {b}\n"
         case _:
             raise ValueError(f"Unsupported header type: {header_type}")
@@ -87,14 +85,24 @@ def capture_hunk(current_hunk, original_lines, offset, last_hunk, hunk_context):
     old_count = sum(1 for l in current_hunk if l.startswith((' ', '-')))
     new_count = sum(1 for l in current_hunk if l.startswith((' ', '+')))
-    # compute starting line in original file
-    old_start = find_hunk_start(current_hunk, original_lines) + 1
-    # if the line number descends, we either have a bad match or a new file
-    if old_start < last_hunk:
-        raise MissingHunkError
+    if old_count > 0:
+        # compute starting line in original file
+        old_start = find_hunk_start(current_hunk, original_lines) + 1
+        # if the line number descends, we either have a bad match or a new file
+        if old_start < last_hunk:
+            raise MissingHunkError
+        else:
+            if new_count == 0:
+                # complete deletion of remaining content
+                new_start = 0
+            else:
+                new_start = old_start + offset
     else:
-        new_start = old_start + offset
+        # old count of zero can only mean file creation, since adding lines to
+        # an existing file requires surrounding context lines without a +
+        old_start = 0
+        new_start = 1   # line numbers are 1-indexed in the real world
     offset += (new_count - old_count)
@@ -122,10 +130,10 @@ def regenerate_index(old_path, new_path, cur_dir):
             "as this would require manually applying the patch first."
         )
-    return f"index {old_sha}..{new_sha}{mode}"
+    return f"index {old_sha}..{new_sha}{mode}\n"
-def fix_patch(patch_lines, original):
+def fix_patch(patch_lines, original, remove_binary=False):
     dir_mode = os.path.isdir(original)
     original_path = Path(original).absolute()
@@ -146,7 +154,10 @@ def fix_patch(patch_lines, original):
     look_for_rename = False
     similarity_index = None
     missing_index = False
+    binary_file = False
     hunk_context = ""
+    original_lines = []
+    file_loaded = False
     for i, line in enumerate(patch_lines):
         match_groups, line_type = match_line(line)
@@ -168,12 +179,14 @@ def fix_patch(patch_lines, original):
                     current_hunk = []
                 a, b = split_ab(match_groups)
                 if a != b:
-                    raise ValueError(f"Diff paths do not match: \n{a}\n{b}")
+                    look_for_rename = True
                 fixed_lines.append(normalize_line(line))
                 last_diff = i
                 file_start_header = False
                 file_end_header = False
                 first_hunk = True
+                binary_file = False
+                file_loaded = False
             case "MODE_LINE":
                 if last_diff != i - 1:
                     raise NotImplementedError("Missing diff line not yet supported")
@@ -188,14 +201,18 @@ def fix_patch(patch_lines, original):
                 fixed_lines.append(normalize_line(line))
                 missing_index = False
             case "BINARY_LINE":
-                raise NotImplementedError("Binary files not supported yet")
+                if remove_binary:
+                    raise NotImplementedError("Ignoring binary files not yet supported")
+                binary_file = True
+                fixed_lines.append(normalize_line(line))
             case "RENAME_FROM":
                 if not look_for_rename:
                     pass    # TODO: handle missing index line
+                if binary_file:
+                    raise NotImplementedError("Renaming binary files not yet supported")
                 if last_index != i - 1:
                     missing_index = True    # need this for existence check in RENAME_TO block
-                    similarity_index = 100  # TODO: is this a dangerous assumption?
-                    fixed_index = "similarity index 100%"
+                    fixed_index = "similarity index 100%\n"
                     fixed_lines.append(normalize_line(fixed_index))
                     last_index = i - 1
                 look_for_rename = False
@@ -204,16 +221,18 @@ def fix_patch(patch_lines, original):
                 offset = 0
                 last_hunk = 0
                 if not Path.exists(current_path):
-                    if similarity_index == 100:
-                        fixed_lines.append(normalize_line(line))
-                        look_for_rename = True
-                        continue
-                    raise NotImplementedError("Parsing files that were both renamed and modified is not yet supported.")
+                    # TODO: verify whether this block is necessary at all
+                    fixed_lines.append(normalize_line(line))
+                    look_for_rename = True
+                    file_loaded = False
+                    continue
+                if not current_path.is_file():
+                    raise IsADirectoryError(f"Rename from header points to a directory, not a file: {current_file}")
                 if dir_mode or current_path == original_path:
                     with open(current_path, encoding='utf-8') as f:
                         original_lines = [l.rstrip('\n') for l in f.readlines()]
                     fixed_lines.append(normalize_line(line))
-                    # TODO: analogous boolean to `file_start_header`?
+                    file_loaded = True
                 else:
                     raise FileNotFoundError(f"Filename {current_file} in `rename from` header does not match argument {original}")
             case "RENAME_TO":
@@ -223,18 +242,20 @@ def fix_patch(patch_lines, original):
                         last_index = i - 2
                     else:
                         raise NotImplementedError("Missing `rename from` header not yet supported.")
-                if look_for_rename:
-                    # the old file doesn't exist, so we need to read this one
-                    current_file = match_groups[0]
-                    current_path = Path(current_file).absolute()
-                    with open(current_path, encoding='utf-8') as f:
-                        original_lines = [l.rstrip('\n') for l in f.readlines()]
-                    fixed_lines.append(normalize_line(line))
-                    look_for_rename = False
-                pass
+                # TODO: do something sensible if `look_for_rename` is false
+                current_file = match_groups[0]
+                current_path = Path(current_file).absolute()
+                if current_file and current_path.is_dir():
+                    raise IsADirectoryError(f"rename to points to a directory, not a file: {current_file}")
+                fixed_lines.append(normalize_line(line))
+                look_for_rename = False
             case "FILE_HEADER_START":
                 if look_for_rename:
                     raise NotImplementedError("Replacing file header with rename not yet supported.")
+                if binary_file:
+                    raise NotImplementedError("A header block with both 'binary files differ' and "
+                                              "file start/end headers is a confusing state"
+                                              "\nfrom which there is no obvious way to recover.")
                 if last_index != i - 1:
                     missing_index = True
                     last_index = i - 1
@@ -242,13 +263,15 @@ def fix_patch(patch_lines, original):
                 if current_file and not dir_mode:
                     raise ValueError("Diff references multiple files but only one provided.")
                 current_file = match_groups[0]
-                offset = 0
-                last_hunk = 0
+                if not file_loaded:
+                    offset = 0
+                    last_hunk = 0
                 if current_file == "/dev/null":
                     if last_diff > last_mode:
                         raise NotImplementedError("Missing mode line not yet supported")
                     fixed_lines.append(normalize_line(line))
                     file_start_header = True
+                    file_loaded = False
                     continue
                 if current_file.startswith("a/"):
                     current_file = current_file[2:]
@@ -257,16 +280,24 @@ def fix_patch(patch_lines, original):
                 current_path = Path(current_file).absolute()
                 if not current_path.exists():
                     raise FileNotFoundError(f"File header start points to non-existent file: {current_file}")
-                if dir_mode or Path(current_file) == Path(original):
-                    with open(current_file, encoding='utf-8') as f:
-                        original_lines = [l.rstrip('\n') for l in f.readlines()]
-                    fixed_lines.append(normalize_line(line))
-                    file_start_header = True
-                else:
-                    raise FileNotFoundError(f"Filename {current_file} in header does not match argument {original}")
+                if not current_path.is_file():
+                    raise IsADirectoryError(f"File header start points to a directory, not a file: {current_file}")
+                if not file_loaded:
+                    if dir_mode or Path(current_file) == Path(original):
+                        with open(current_file, encoding='utf-8') as f:
+                            original_lines = [l.rstrip('\n') for l in f.readlines()]
+                        file_loaded = True
+                    else:
+                        raise FileNotFoundError(f"Filename {current_file} in header does not match argument {original}")
+                fixed_lines.append(normalize_line(line))
+                file_start_header = True
             case "FILE_HEADER_END":
                 if look_for_rename:
                     raise NotImplementedError("Replacing file header with rename not yet supported.")
+                if binary_file:
+                    raise NotImplementedError("A header block with both 'binary files differ' and "
+                                              "file start/end headers is a confusing state"
+                                              "\nfrom which there is no obvious way to recover.")
                 dest_file = match_groups[0]
                 dest_path = Path(dest_file).absolute()
                 if dest_file.startswith("b/"):
@@ -286,37 +317,47 @@ def fix_patch(patch_lines, original):
                     else:
                         # reconstruct file start header based on end header
                         a = match_groups[0].replace("b", "a")
-                        fixed_lines.append(normalize_line(f"--- {a}"))
+                        fixed_lines.append(normalize_line(f"--- {a}\n"))
                     file_start_header = True
                 elif current_file == "/dev/null":
                     if dest_file == "/dev/null":
                         raise ValueError("File headers cannot both be /dev/null")
-                    elif not dest_path.exists():
-                        raise FileNotFoundError(f"File header end points to non-existent file: {dest_file}")
+                    elif dest_path.exists():
+                        raise FileExistsError(f"File header start /dev/null implies file creation, "
+                                              f"but file header end would overwrite existing file: {dest_file}")
                     current_file = dest_file
                     current_path = Path(current_file).absolute()
                     if dir_mode or current_path == original_path:
-                        # TODO: in dir mode, verify that current file exists in original path
-                        with open(current_path, encoding='utf-8') as f:
-                            original_lines = [l.rstrip('\n') for l in f.readlines()]
+                        original_lines = []
                         fixed_lines.append(normalize_line(line))
                         file_end_header = True
                     else:
                         raise FileNotFoundError(f"Filename {current_file} in header does not match argument {original}")
                 elif dest_file == "/dev/null":
-                    # TODO: check if other modes are possible
-                    if last_mode < last_diff:
-                        last_mode = last_diff + 1
-                        fixed_lines.insert(last_mode, "deleted file mode 100644")
-                        last_index += 1     # index comes after mode
+                    current_path = Path(current_file).absolute()
+                    if not current_path.exists():
+                        raise FileNotFoundError(f"The file being 'deleted' does not exist: {current_file}")
+                    if last_mode <= last_diff:
+                        fixed_lines.insert(last_diff + 1, "deleted file mode 100644\n")
+                        last_index += 1
                     elif "deleted" not in fixed_lines[last_mode]:
-                        fixed_lines[last_mode] = "deleted file mode 100644"
-                    else:
-                        fixed_lines.append("deleted file mode 100644")
+                        fixed_lines[last_mode] = "deleted file mode 100644\n"
+                    fixed_lines.append(normalize_line(line))
+                    file_end_header = True
                 elif current_file != dest_file:
-                    raise ValueError(f"File headers do not match: \n{current_file}\n{dest_file}")
-                pass
+                    # this is a rename, original_lines is already set from FILE_HEADER_START
+                    fixed_lines.append(normalize_line(line))
+                    file_end_header = True
+                    first_hunk = True
+                else:
+                    fixed_lines.append(normalize_line(line))
+                    file_end_header = True
             case "HUNK_HEADER":
+                if binary_file:
+                    raise ValueError("Binary file can't have a hunk header.")
+                if look_for_rename:
+                    raise ValueError(f"Rename header expected but not found.\n"
+                                     f"Hint: look at lines {last_diff}-{i} of the input patch.")
                 # fix missing file headers before capturing the hunk
                 if not file_end_header:
                     diff_line = patch_lines[last_diff]
@@ -370,7 +411,7 @@ def fix_patch(patch_lines, original):
     fixed_lines.extend(current_hunk)
     # if original file didn't end with a newline, strip out the newline here
-    if not original_lines[-1].endswith("\n"):
+    if original_lines and not original_lines[-1].endswith("\n"):
         fixed_lines[-1] = fixed_lines[-1].rstrip("\n")
     return fixed_lines
@@ -395,6 +436,7 @@ def main():
     print(f"Fixed patch written to {output_file}")
 if __name__ == "__main__":
     main()

{patch_fixer-0.2.3 → patch_fixer-0.3.0}/patch_fixer.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: patch-fixer
-Version: 0.2.3
+Version: 0.3.0
 Summary: Fixes erroneous git apply patches to the best of its ability.
 Maintainer-email: Alex Mueller <amueller474@gmail.com>
 License-Expression: Apache-2.0
@@ -69,8 +69,9 @@ cd patch-fixer
 pip install -e .[test]
 pytest
 ```
-Note that some test failures are expected as this project is in the early alpha stage.
-Please only report test failures if the same test passed in a previous version.
+From version `0.3.0` onward (at least until version `1.0`), some test failures are expected
+in bugfix versions as I like to use test-driven development to build out new features.
+Please only report test failures if the same test passed in the most recent `0.x.0` version.
 ## License

{patch_fixer-0.2.3 → patch_fixer-0.3.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "patch-fixer"
-version = "0.2.3"
+version = "0.3.0"
 description = "Fixes erroneous git apply patches to the best of its ability."
 maintainers = [
     {name = "Alex Mueller", email="amueller474@gmail.com"},

{patch_fixer-0.2.3 → patch_fixer-0.3.0}/tests/test_repos.py RENAMED Viewed

@@ -36,6 +36,7 @@ REPOS = {
 CACHE_DIR = Path.home() / ".patch-testing"
 class DeletedBranchError(ValueError):
     def __init__(self, commit_hash):
         self.commit_hash = commit_hash
@@ -47,7 +48,7 @@ def verify_commit_exists(repo: Repo, commit_hash: str) -> None:
     try:
         repo.commit(commit_hash)
     except ValueError:
-        # Commit belongs to a deleted branch (let caller handle it)
+        # commit belongs to a deleted branch (let caller handle it)
         raise DeletedBranchError(commit_hash)
@@ -64,13 +65,13 @@ def download_commit_zip(repo_url, commit_hash: str, dest_path: Path) -> None:
         print(f"Failed to download commit snapshot: {e}")
         sys.exit(1)
-    # Extract the zip into dest_path
+    # extract the zip into dest_path
     with zipfile.ZipFile(io.BytesIO(r.content)) as z:
         # GitHub wraps contents in a top-level folder named like repo-<hash>
         top_level = z.namelist()[0].split("/")[0]
         z.extractall(dest_path.parent)
-        # Move extracted folder to dest_path
+        # move extracted folder to dest_path
         extracted_path = dest_path.parent / top_level
         if dest_path.exists():
             shutil.rmtree(dest_path)
@@ -111,7 +112,7 @@ def clone_repos(repo_group, repo_name, old_commit, new_commit):
             # no sense keeping around an object that points to HEAD
             repo_new = Repo(repo_new_path)
-        # Prevent downloading the repo twice if we can help it
+        # prevent downloading the repo twice if we can help it
         shutil.copytree(repo_new_path, repo_old_path)
         repo_old = Repo(repo_old_path)
         try:
@@ -137,7 +138,7 @@ def test_integration_equality(repo_group, repo_name, old_commit, new_commit):
     ) = clone_repos(repo_group, repo_name, old_commit, new_commit)
     expected = repo_new.git.diff(old_commit, new_commit)
-    input_lines = expected.splitlines()
+    input_lines = expected.splitlines(keepends=True)
     fixed_lines = fix_patch(input_lines, repo_old_path)
     actual = "".join(fixed_lines)

{patch_fixer-0.2.3 → patch_fixer-0.3.0}/LICENSE RENAMED Viewed

File without changes

{patch_fixer-0.2.3 → patch_fixer-0.3.0}/patch_fixer/__init__.py RENAMED Viewed

File without changes

{patch_fixer-0.2.3 → patch_fixer-0.3.0}/patch_fixer.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{patch_fixer-0.2.3 → patch_fixer-0.3.0}/patch_fixer.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{patch_fixer-0.2.3 → patch_fixer-0.3.0}/patch_fixer.egg-info/requires.txt RENAMED Viewed

File without changes

{patch_fixer-0.2.3 → patch_fixer-0.3.0}/patch_fixer.egg-info/top_level.txt RENAMED Viewed

File without changes

{patch_fixer-0.2.3 → patch_fixer-0.3.0}/setup.cfg RENAMED Viewed

File without changes

patch-fixer 0.2.3__tar.gz → 0.3.0__tar.gz

patch-fixer 0.2.3tar.gz → 0.3.0tar.gz