PyPI - patchdiff - Versions diffs - 0.3.5__tar.gz → 0.3.6__tar.gz - Mend

patchdiff 0.3.5tar.gz → 0.3.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

patchdiff-0.3.6/.github/workflows/benchmark.yml ADDED Viewed

@@ -0,0 +1,69 @@
+name: Benchmarks
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+jobs:
+  benchmark:
+    name: Benchmarks
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+      - name: Set up Python 3.14
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.14'
+      - name: Install dependencies
+        run: uv sync
+      # Restore benchmark baseline (read-only for PRs)
+      - name: Restore benchmark baseline
+        uses: actions/cache/restore@v4
+        with:
+          path: .benchmarks
+          key: benchmark-baseline-3.14-${{ runner.os }}
+      # On master: save baseline results
+      - name: Run benchmarks and save baseline
+        if: github.ref == 'refs/heads/master'
+        continue-on-error: true
+        run: |
+          uv run --no-sync pytest benchmarks/benchmark.py \
+            --benchmark-only \
+            --benchmark-autosave \
+            --benchmark-sort=name
+      # On master: cache the new baseline results
+      - name: Save benchmark baseline
+        if: github.ref == 'refs/heads/master'
+        uses: actions/cache/save@v4
+        with:
+          path: .benchmarks
+          key: benchmark-baseline-3.14-${{ runner.os }}
+      # On PRs: compare against baseline and fail if degraded
+      - name: Run benchmarks and compare
+        if: github.event_name == 'pull_request'
+        run: |
+          if [ -z "$(uv run --no-sync pytest-benchmark list)" ]; then
+            echo "No baseline found, not comparing"
+            uv run --no-sync pytest -v benchmarks/benchmark.py
+            exit
+          fi
+          uv run --no-sync pytest benchmarks/benchmark.py \
+              --benchmark-only \
+              --benchmark-compare \
+              --benchmark-compare-fail=mean:5% \
+              --benchmark-sort=name

{patchdiff-0.3.5 → patchdiff-0.3.6}/.gitignore RENAMED Viewed

@@ -4,3 +4,4 @@ __pycache__
 .coverage
 dist
 uv.lock
+.benchmarks/

{patchdiff-0.3.5 → patchdiff-0.3.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: patchdiff
-Version: 0.3.5
+Version: 0.3.6
 Summary: MIT
 Project-URL: Homepage, https://github.com/fork-tongue/patchdiff
 Author-email: Korijn van Golen <korijn@gmail.com>, Berend Klein Haneveld <berendkleinhaneveld@gmail.com>

patchdiff-0.3.6/benchmarks/benchmark.py ADDED Viewed

@@ -0,0 +1,166 @@
+"""
+Benchmark suite for patchdiff performance testing using pytest-benchmark.
+Run benchmarks:
+    uv run pytest benchmarks/benchmark.py --benchmark-only
+Save baseline:
+    uv run pytest benchmarks/benchmark.py --benchmark-only --benchmark-autosave
+Compare against baseline:
+    uv run pytest benchmarks/benchmark.py --benchmark-only --benchmark-compare=0001
+Fail if performance degrades >5%:
+    uv run pytest benchmarks/benchmark.py --benchmark-only --benchmark-compare=0001 --benchmark-compare-fail=mean:5%
+"""
+import random
+import pytest
+from patchdiff import apply, diff
+# Set seed for reproducibility
+random.seed(42)
+def generate_random_list(size: int, value_range: int = 1000) -> list[int]:
+    """Generate a random list of integers."""
+    return [random.randint(0, value_range) for _ in range(size)]
+def generate_similar_lists(
+    size: int, change_ratio: float = 0.1
+) -> tuple[list[int], list[int]]:
+    """
+    Generate two similar lists with specified change ratio.
+    Args:
+        size: Size of the lists
+        change_ratio: Ratio of elements that differ (0.0 to 1.0)
+    """
+    list_a = generate_random_list(size)
+    list_b = list_a.copy()
+    num_changes = int(size * change_ratio)
+    # Make some replacements
+    for _ in range(num_changes // 3):
+        idx = random.randint(0, size - 1)
+        list_b[idx] = random.randint(0, 1000)
+    # Make some insertions
+    for _ in range(num_changes // 3):
+        idx = random.randint(0, len(list_b))
+        list_b.insert(idx, random.randint(0, 1000))
+    # Make some deletions
+    for _ in range(num_changes // 3):
+        if list_b:
+            idx = random.randint(0, len(list_b) - 1)
+            del list_b[idx]
+    return list_a, list_b
+def generate_nested_dict(depth: int, breadth: int) -> dict | int:
+    """Generate a nested dictionary structure."""
+    if depth == 0:
+        return random.randint(0, 1000)
+    result = {}
+    for i in range(breadth):
+        key = f"key_{i}"
+        if random.random() > 0.3:
+            result[key] = generate_nested_dict(depth - 1, breadth)
+        else:
+            result[key] = random.randint(0, 1000)
+    return result
+# ========================================
+# List Diff Benchmarks
+# ========================================
+@pytest.mark.benchmark(group="list-diff")
+def test_list_diff_small_10pct(benchmark):
+    """Benchmark: 50 element list with 10% changes."""
+    a, b = generate_similar_lists(50, 0.1)
+    benchmark(diff, a, b)
+@pytest.mark.benchmark(group="list-diff")
+@pytest.mark.parametrize("change_ratio", [0.05, 0.1, 0.5])
+def test_list_diff_medium(benchmark, change_ratio):
+    """Benchmark: 1000 element list with varying change ratios."""
+    a, b = generate_similar_lists(1000, change_ratio)
+    benchmark(diff, a, b)
+@pytest.mark.benchmark(group="list-diff-edge")
+def test_list_diff_completely_different(benchmark):
+    """Benchmark: Two completely different 1000 element lists."""
+    a = generate_random_list(1000)
+    b = generate_random_list(1000)
+    benchmark(diff, a, b)
+@pytest.mark.benchmark(group="list-diff-edge")
+def test_list_diff_identical(benchmark):
+    """Benchmark: Two identical 10000 element lists."""
+    a = generate_random_list(10000)
+    b = a.copy()
+    benchmark(diff, a, b)
+# ========================================
+# Dict Diff Benchmarks
+# ========================================
+@pytest.mark.benchmark(group="dict-diff")
+def test_dict_diff_flat_500_keys(benchmark):
+    """Benchmark: Flat dict with 500 keys, 10% changed."""
+    a = {f"key_{i}": i for i in range(500)}
+    b = a.copy()
+    # Change 10%
+    for i in range(50):
+        b[f"key_{i}"] = i + 500
+    benchmark(diff, a, b)
+@pytest.mark.benchmark(group="dict-diff")
+def test_dict_diff_nested(benchmark):
+    """Benchmark: Nested dict with depth=3, breadth=5."""
+    a = generate_nested_dict(3, 5)
+    b = generate_nested_dict(3, 5)
+    benchmark(diff, a, b)
+# ========================================
+# Mixed Structure Benchmarks
+# ========================================
+@pytest.mark.benchmark(group="mixed")
+def test_mixed_dict_with_list_values(benchmark):
+    """Benchmark: Dict with 50 keys, each containing a 100-element list."""
+    a = {f"key_{i}": generate_random_list(100) for i in range(50)}
+    b = {f"key_{i}": generate_random_list(100) for i in range(50)}
+    benchmark(diff, a, b)
+# ========================================
+# Apply Benchmarks
+# ========================================
+@pytest.mark.benchmark(group="apply")
+def test_apply_list_1000_elements(benchmark):
+    """Benchmark: Apply patch to 1000 element list with 10% changes."""
+    a, b = generate_similar_lists(1000, 0.1)
+    ops, _ = diff(a, b)
+    benchmark(apply, a, ops)

patchdiff-0.3.6/patchdiff/diff.py ADDED Viewed

@@ -0,0 +1,187 @@
+from __future__ import annotations
+from typing import Dict, List, Set, Tuple
+from .pointer import Pointer
+from .types import Diffable
+def diff_lists(input: List, output: List, ptr: Pointer) -> Tuple[List, List]:
+    m, n = len(input), len(output)
+    # Build DP table bottom-up (iterative approach)
+    # dp[i][j] = cost of transforming input[0:i] to output[0:j]
+    dp = [[0] * (n + 1) for _ in range(m + 1)]
+    # Initialize base cases
+    for i in range(1, m + 1):
+        dp[i][0] = i  # Cost of deleting all elements
+    for j in range(1, n + 1):
+        dp[0][j] = j  # Cost of adding all elements
+    # Fill DP table
+    for i in range(1, m + 1):
+        for j in range(1, n + 1):
+            if input[i - 1] == output[j - 1]:
+                # Elements match, no operation needed
+                dp[i][j] = dp[i - 1][j - 1]
+            else:
+                # Take minimum of three operations
+                dp[i][j] = min(
+                    dp[i - 1][j] + 1,  # Remove from input
+                    dp[i][j - 1] + 1,  # Add from output
+                    dp[i - 1][j - 1] + 1,  # Replace
+                )
+    # Traceback to extract operations
+    ops = []
+    rops = []
+    i, j = m, n
+    while i > 0 or j > 0:
+        if i > 0 and j > 0 and input[i - 1] == output[j - 1]:
+            # Elements match, no operation
+            i -= 1
+            j -= 1
+        elif i > 0 and (j == 0 or dp[i][j] == dp[i - 1][j] + 1):
+            # Remove from input
+            ops.append({"op": "remove", "idx": i - 1})
+            rops.append({"op": "add", "idx": j - 1, "value": input[i - 1]})
+            i -= 1
+        elif j > 0 and (i == 0 or dp[i][j] == dp[i][j - 1] + 1):
+            # Add from output
+            ops.append({"op": "add", "idx": i - 1, "value": output[j - 1]})
+            rops.append({"op": "remove", "idx": j - 1})
+            j -= 1
+        else:
+            # Replace
+            ops.append(
+                {
+                    "op": "replace",
+                    "idx": i - 1,
+                    "original": input[i - 1],
+                    "value": output[j - 1],
+                }
+            )
+            rops.append(
+                {
+                    "op": "replace",
+                    "idx": j - 1,
+                    "original": output[j - 1],
+                    "value": input[i - 1],
+                }
+            )
+            i -= 1
+            j -= 1
+    # Apply padding to operations (using explicit loops instead of reduce)
+    padded_ops = []
+    padding = 0
+    # Iterate in reverse to get correct order (traceback extracts operations backwards)
+    for op in reversed(ops):
+        if op["op"] == "add":
+            padded_idx = op["idx"] + 1 + padding
+            idx_token = padded_idx if padded_idx < len(input) + padding else "-"
+            padded_ops.append(
+                {
+                    "op": "add",
+                    "path": ptr.append(idx_token),
+                    "value": op["value"],
+                }
+            )
+            padding += 1
+        elif op["op"] == "remove":
+            padded_ops.append(
+                {
+                    "op": "remove",
+                    "path": ptr.append(op["idx"] + padding),
+                }
+            )
+            padding -= 1
+        else:  # replace
+            replace_ptr = ptr.append(op["idx"] + padding)
+            replace_ops, _ = diff(op["original"], op["value"], replace_ptr)
+            padded_ops.extend(replace_ops)
+    padded_rops = []
+    padding = 0
+    # Iterate in reverse to get correct order (traceback extracts operations backwards)
+    for op in reversed(rops):
+        if op["op"] == "add":
+            padded_idx = op["idx"] + 1 + padding
+            idx_token = padded_idx if padded_idx < len(output) + padding else "-"
+            padded_rops.append(
+                {
+                    "op": "add",
+                    "path": ptr.append(idx_token),
+                    "value": op["value"],
+                }
+            )
+            padding += 1
+        elif op["op"] == "remove":
+            padded_rops.append(
+                {
+                    "op": "remove",
+                    "path": ptr.append(op["idx"] + padding),
+                }
+            )
+            padding -= 1
+        else:  # replace
+            replace_ptr = ptr.append(op["idx"] + padding)
+            replace_ops, _ = diff(op["original"], op["value"], replace_ptr)
+            padded_rops.extend(replace_ops)
+    return padded_ops, padded_rops
+def diff_dicts(input: Dict, output: Dict, ptr: Pointer) -> Tuple[List, List]:
+    ops, rops = [], []
+    input_keys = set(input.keys())
+    output_keys = set(output.keys())
+    for key in input_keys - output_keys:
+        ops.append({"op": "remove", "path": ptr.append(key)})
+        rops.insert(0, {"op": "add", "path": ptr.append(key), "value": input[key]})
+    for key in output_keys - input_keys:
+        ops.append(
+            {
+                "op": "add",
+                "path": ptr.append(key),
+                "value": output[key],
+            }
+        )
+        rops.insert(0, {"op": "remove", "path": ptr.append(key)})
+    for key in input_keys & output_keys:
+        key_ops, key_rops = diff(input[key], output[key], ptr.append(key))
+        ops.extend(key_ops)
+        key_rops.extend(rops)
+        rops = key_rops
+    return ops, rops
+def diff_sets(input: Set, output: Set, ptr: Pointer) -> Tuple[List, List]:
+    ops, rops = [], []
+    for value in input - output:
+        ops.append({"op": "remove", "path": ptr.append(value)})
+        rops.insert(0, {"op": "add", "path": ptr.append("-"), "value": value})
+    for value in output - input:
+        ops.append({"op": "add", "path": ptr.append("-"), "value": value})
+        rops.insert(0, {"op": "remove", "path": ptr.append(value)})
+    return ops, rops
+def diff(
+    input: Diffable, output: Diffable, ptr: Pointer | None = None
+) -> Tuple[List, List]:
+    if input == output:
+        return [], []
+    if ptr is None:
+        ptr = Pointer()
+    if hasattr(input, "append") and hasattr(output, "append"):  # list
+        return diff_lists(input, output, ptr)
+    if hasattr(input, "keys") and hasattr(output, "keys"):  # dict
+        return diff_dicts(input, output, ptr)
+    if hasattr(input, "add") and hasattr(output, "add"):  # set
+        return diff_sets(input, output, ptr)
+    return [{"op": "replace", "path": ptr, "value": output}], [
+        {"op": "replace", "path": ptr, "value": input}
+    ]

{patchdiff-0.3.5 → patchdiff-0.3.6}/patchdiff/pointer.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from __future__ import annotations
 import re
-from typing import Any, Hashable, List, Tuple
+from typing import Any, Hashable, Iterable, Tuple
 from .types import Diffable
@@ -20,7 +20,7 @@ def escape(token: str) -> str:
 class Pointer:
-    def __init__(self, tokens: List[Hashable] | None = None) -> None:
+    def __init__(self, tokens: Iterable[Hashable] | None = None) -> None:
         if tokens is None:
             tokens = []
         self.tokens = tuple(tokens)

{patchdiff-0.3.5 → patchdiff-0.3.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "patchdiff"
-version = "0.3.5"
+version = "0.3.6"
 description = "MIT"
 authors = [
     { name = "Korijn van Golen", email = "korijn@gmail.com" },
@@ -18,6 +18,7 @@ dev = [
     "pytest",
     "pytest-cov",
     "pytest-watch",
+    "pytest-benchmark",
 ]
 [tool.ruff.lint]

patchdiff-0.3.5/patchdiff/diff.py DELETED Viewed

@@ -1,141 +0,0 @@
-from functools import partial, reduce
-from typing import Dict, List, Set, Tuple
-from .pointer import Pointer
-from .types import Diffable
-def diff_lists(input: List, output: List, ptr: Pointer) -> Tuple[List, List]:
-    memory = {(0, 0): {"ops": [], "rops": [], "cost": 0}}
-    def dist(i, j):
-        if (i, j) not in memory:
-            if i > 0 and j > 0 and input[i - 1] == output[j - 1]:
-                step = dist(i - 1, j - 1)
-            else:
-                paths = []
-                if i > 0:
-                    base = dist(i - 1, j)
-                    op = {"op": "remove", "idx": i - 1}
-                    rop = {"op": "add", "idx": j - 1, "value": input[i - 1]}
-                    paths.append(
-                        {
-                            "ops": base["ops"] + [op],
-                            "rops": base["rops"] + [rop],
-                            "cost": base["cost"] + 1,
-                        }
-                    )
-                if j > 0:
-                    base = dist(i, j - 1)
-                    op = {"op": "add", "idx": i - 1, "value": output[j - 1]}
-                    rop = {"op": "remove", "idx": j - 1}
-                    paths.append(
-                        {
-                            "ops": base["ops"] + [op],
-                            "rops": base["rops"] + [rop],
-                            "cost": base["cost"] + 1,
-                        }
-                    )
-                if i > 0 and j > 0:
-                    base = dist(i - 1, j - 1)
-                    op = {
-                        "op": "replace",
-                        "idx": i - 1,
-                        "original": input[i - 1],
-                        "value": output[j - 1],
-                    }
-                    rop = {
-                        "op": "replace",
-                        "idx": j - 1,
-                        "original": output[j - 1],
-                        "value": input[i - 1],
-                    }
-                    paths.append(
-                        {
-                            "ops": base["ops"] + [op],
-                            "rops": base["rops"] + [rop],
-                            "cost": base["cost"] + 1,
-                        }
-                    )
-                step = min(paths, key=lambda a: a["cost"])
-            memory[(i, j)] = step
-        return memory[(i, j)]
-    def pad(state, op, target=None):
-        ops, padding = state
-        if op["op"] == "add":
-            padded_idx = op["idx"] + 1 + padding
-            idx_token = padded_idx if padded_idx < len(target) + padding else "-"
-            full_op = {
-                "op": "add",
-                "path": ptr.append(idx_token),
-                "value": op["value"],
-            }
-            return [[*ops, full_op], padding + 1]
-        elif op["op"] == "remove":
-            full_op = {
-                "op": "remove",
-                "path": ptr.append(op["idx"] + padding),
-            }
-            return [[*ops, full_op], padding - 1]
-        else:
-            replace_ptr = ptr.append(op["idx"] + padding)
-            replace_ops, _ = diff(op["original"], op["value"], replace_ptr)
-            return [ops + replace_ops, padding]
-    solution = dist(len(input), len(output))
-    padded_ops, _ = reduce(partial(pad, target=input), solution["ops"], [[], 0])
-    padded_rops, _ = reduce(partial(pad, target=output), solution["rops"], [[], 0])
-    return padded_ops, padded_rops
-def diff_dicts(input: Dict, output: Dict, ptr: Pointer) -> Tuple[List, List]:
-    ops, rops = [], []
-    input_keys = set(input.keys())
-    output_keys = set(output.keys())
-    for key in input_keys - output_keys:
-        ops.append({"op": "remove", "path": ptr.append(key)})
-        rops.insert(0, {"op": "add", "path": ptr.append(key), "value": input[key]})
-    for key in output_keys - input_keys:
-        ops.append(
-            {
-                "op": "add",
-                "path": ptr.append(key),
-                "value": output[key],
-            }
-        )
-        rops.insert(0, {"op": "remove", "path": ptr.append(key)})
-    for key in input_keys & output_keys:
-        key_ops, key_rops = diff(input[key], output[key], ptr.append(key))
-        ops.extend(key_ops)
-        key_rops.extend(rops)
-        rops = key_rops
-    return ops, rops
-def diff_sets(input: Set, output: Set, ptr: Pointer) -> Tuple[List, List]:
-    ops, rops = [], []
-    for value in input - output:
-        ops.append({"op": "remove", "path": ptr.append(value)})
-        rops.insert(0, {"op": "add", "path": ptr.append("-"), "value": value})
-    for value in output - input:
-        ops.append({"op": "add", "path": ptr.append("-"), "value": value})
-        rops.insert(0, {"op": "remove", "path": ptr.append(value)})
-    return ops, rops
-def diff(input: Diffable, output: Diffable, ptr: Pointer = None) -> Tuple[List, List]:
-    if input == output:
-        return [], []
-    if ptr is None:
-        ptr = Pointer()
-    if hasattr(input, "append") and hasattr(output, "append"):  # list
-        return diff_lists(input, output, ptr)
-    if hasattr(input, "keys") and hasattr(output, "keys"):  # dict
-        return diff_dicts(input, output, ptr)
-    if hasattr(input, "add") and hasattr(output, "add"):  # set
-        return diff_sets(input, output, ptr)
-    return [{"op": "replace", "path": ptr, "value": output}], [
-        {"op": "replace", "path": ptr, "value": input}
-    ]