patchdiff 0.3.5__tar.gz → 0.3.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,69 @@
1
+ name: Benchmarks
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+ pull_request:
8
+ branches:
9
+ - master
10
+
11
+ jobs:
12
+ benchmark:
13
+ name: Benchmarks
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - uses: actions/checkout@v5
17
+
18
+ - name: Install uv
19
+ uses: astral-sh/setup-uv@v6
20
+
21
+ - name: Set up Python 3.14
22
+ uses: actions/setup-python@v5
23
+ with:
24
+ python-version: '3.14'
25
+
26
+ - name: Install dependencies
27
+ run: uv sync
28
+
29
+ # Restore benchmark baseline (read-only for PRs)
30
+ - name: Restore benchmark baseline
31
+ uses: actions/cache/restore@v4
32
+ with:
33
+ path: .benchmarks
34
+ key: benchmark-baseline-3.14-${{ runner.os }}
35
+
36
+ # On master: save baseline results
37
+ - name: Run benchmarks and save baseline
38
+ if: github.ref == 'refs/heads/master'
39
+ continue-on-error: true
40
+ run: |
41
+ uv run --no-sync pytest benchmarks/benchmark.py \
42
+ --benchmark-only \
43
+ --benchmark-autosave \
44
+ --benchmark-sort=name
45
+
46
+ # On master: cache the new baseline results
47
+ - name: Save benchmark baseline
48
+ if: github.ref == 'refs/heads/master'
49
+ uses: actions/cache/save@v4
50
+ with:
51
+ path: .benchmarks
52
+ key: benchmark-baseline-3.14-${{ runner.os }}
53
+
54
+ # On PRs: compare against baseline and fail if degraded
55
+ - name: Run benchmarks and compare
56
+ if: github.event_name == 'pull_request'
57
+ run: |
58
+ if [ -z "$(uv run --no-sync pytest-benchmark list)" ]; then
59
+ echo "No baseline found, not comparing"
60
+ uv run --no-sync pytest -v benchmarks/benchmark.py
61
+ exit
62
+ fi
63
+
64
+ uv run --no-sync pytest benchmarks/benchmark.py \
65
+ --benchmark-only \
66
+ --benchmark-compare \
67
+ --benchmark-compare-fail=mean:5% \
68
+ --benchmark-sort=name
69
+
@@ -4,3 +4,4 @@ __pycache__
4
4
  .coverage
5
5
  dist
6
6
  uv.lock
7
+ .benchmarks/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: patchdiff
3
- Version: 0.3.5
3
+ Version: 0.3.6
4
4
  Summary: MIT
5
5
  Project-URL: Homepage, https://github.com/fork-tongue/patchdiff
6
6
  Author-email: Korijn van Golen <korijn@gmail.com>, Berend Klein Haneveld <berendkleinhaneveld@gmail.com>
@@ -0,0 +1,166 @@
1
+ """
2
+ Benchmark suite for patchdiff performance testing using pytest-benchmark.
3
+
4
+ Run benchmarks:
5
+ uv run pytest benchmarks/benchmark.py --benchmark-only
6
+
7
+ Save baseline:
8
+ uv run pytest benchmarks/benchmark.py --benchmark-only --benchmark-autosave
9
+
10
+ Compare against baseline:
11
+ uv run pytest benchmarks/benchmark.py --benchmark-only --benchmark-compare=0001
12
+
13
+ Fail if performance degrades >5%:
14
+ uv run pytest benchmarks/benchmark.py --benchmark-only --benchmark-compare=0001 --benchmark-compare-fail=mean:5%
15
+ """
16
+
17
+ import random
18
+
19
+ import pytest
20
+
21
+ from patchdiff import apply, diff
22
+
23
+ # Set seed for reproducibility
24
+ random.seed(42)
25
+
26
+
27
+ def generate_random_list(size: int, value_range: int = 1000) -> list[int]:
28
+ """Generate a random list of integers."""
29
+ return [random.randint(0, value_range) for _ in range(size)]
30
+
31
+
32
+ def generate_similar_lists(
33
+ size: int, change_ratio: float = 0.1
34
+ ) -> tuple[list[int], list[int]]:
35
+ """
36
+ Generate two similar lists with specified change ratio.
37
+
38
+ Args:
39
+ size: Size of the lists
40
+ change_ratio: Ratio of elements that differ (0.0 to 1.0)
41
+ """
42
+ list_a = generate_random_list(size)
43
+ list_b = list_a.copy()
44
+
45
+ num_changes = int(size * change_ratio)
46
+
47
+ # Make some replacements
48
+ for _ in range(num_changes // 3):
49
+ idx = random.randint(0, size - 1)
50
+ list_b[idx] = random.randint(0, 1000)
51
+
52
+ # Make some insertions
53
+ for _ in range(num_changes // 3):
54
+ idx = random.randint(0, len(list_b))
55
+ list_b.insert(idx, random.randint(0, 1000))
56
+
57
+ # Make some deletions
58
+ for _ in range(num_changes // 3):
59
+ if list_b:
60
+ idx = random.randint(0, len(list_b) - 1)
61
+ del list_b[idx]
62
+
63
+ return list_a, list_b
64
+
65
+
66
+ def generate_nested_dict(depth: int, breadth: int) -> dict | int:
67
+ """Generate a nested dictionary structure."""
68
+ if depth == 0:
69
+ return random.randint(0, 1000)
70
+
71
+ result = {}
72
+ for i in range(breadth):
73
+ key = f"key_{i}"
74
+ if random.random() > 0.3:
75
+ result[key] = generate_nested_dict(depth - 1, breadth)
76
+ else:
77
+ result[key] = random.randint(0, 1000)
78
+ return result
79
+
80
+
81
+ # ========================================
82
+ # List Diff Benchmarks
83
+ # ========================================
84
+
85
+
86
+ @pytest.mark.benchmark(group="list-diff")
87
+ def test_list_diff_small_10pct(benchmark):
88
+ """Benchmark: 50 element list with 10% changes."""
89
+ a, b = generate_similar_lists(50, 0.1)
90
+ benchmark(diff, a, b)
91
+
92
+
93
+ @pytest.mark.benchmark(group="list-diff")
94
+ @pytest.mark.parametrize("change_ratio", [0.05, 0.1, 0.5])
95
+ def test_list_diff_medium(benchmark, change_ratio):
96
+ """Benchmark: 1000 element list with varying change ratios."""
97
+ a, b = generate_similar_lists(1000, change_ratio)
98
+ benchmark(diff, a, b)
99
+
100
+
101
+ @pytest.mark.benchmark(group="list-diff-edge")
102
+ def test_list_diff_completely_different(benchmark):
103
+ """Benchmark: Two completely different 1000 element lists."""
104
+ a = generate_random_list(1000)
105
+ b = generate_random_list(1000)
106
+ benchmark(diff, a, b)
107
+
108
+
109
+ @pytest.mark.benchmark(group="list-diff-edge")
110
+ def test_list_diff_identical(benchmark):
111
+ """Benchmark: Two identical 10000 element lists."""
112
+ a = generate_random_list(10000)
113
+ b = a.copy()
114
+ benchmark(diff, a, b)
115
+
116
+
117
+ # ========================================
118
+ # Dict Diff Benchmarks
119
+ # ========================================
120
+
121
+
122
+ @pytest.mark.benchmark(group="dict-diff")
123
+ def test_dict_diff_flat_500_keys(benchmark):
124
+ """Benchmark: Flat dict with 500 keys, 10% changed."""
125
+ a = {f"key_{i}": i for i in range(500)}
126
+ b = a.copy()
127
+ # Change 10%
128
+ for i in range(50):
129
+ b[f"key_{i}"] = i + 500
130
+
131
+ benchmark(diff, a, b)
132
+
133
+
134
+ @pytest.mark.benchmark(group="dict-diff")
135
+ def test_dict_diff_nested(benchmark):
136
+ """Benchmark: Nested dict with depth=3, breadth=5."""
137
+ a = generate_nested_dict(3, 5)
138
+ b = generate_nested_dict(3, 5)
139
+ benchmark(diff, a, b)
140
+
141
+
142
+ # ========================================
143
+ # Mixed Structure Benchmarks
144
+ # ========================================
145
+
146
+
147
+ @pytest.mark.benchmark(group="mixed")
148
+ def test_mixed_dict_with_list_values(benchmark):
149
+ """Benchmark: Dict with 50 keys, each containing a 100-element list."""
150
+ a = {f"key_{i}": generate_random_list(100) for i in range(50)}
151
+ b = {f"key_{i}": generate_random_list(100) for i in range(50)}
152
+ benchmark(diff, a, b)
153
+
154
+
155
+ # ========================================
156
+ # Apply Benchmarks
157
+ # ========================================
158
+
159
+
160
+ @pytest.mark.benchmark(group="apply")
161
+ def test_apply_list_1000_elements(benchmark):
162
+ """Benchmark: Apply patch to 1000 element list with 10% changes."""
163
+ a, b = generate_similar_lists(1000, 0.1)
164
+ ops, _ = diff(a, b)
165
+
166
+ benchmark(apply, a, ops)
@@ -0,0 +1,187 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict, List, Set, Tuple
4
+
5
+ from .pointer import Pointer
6
+ from .types import Diffable
7
+
8
+
9
+ def diff_lists(input: List, output: List, ptr: Pointer) -> Tuple[List, List]:
10
+ m, n = len(input), len(output)
11
+
12
+ # Build DP table bottom-up (iterative approach)
13
+ # dp[i][j] = cost of transforming input[0:i] to output[0:j]
14
+ dp = [[0] * (n + 1) for _ in range(m + 1)]
15
+
16
+ # Initialize base cases
17
+ for i in range(1, m + 1):
18
+ dp[i][0] = i # Cost of deleting all elements
19
+ for j in range(1, n + 1):
20
+ dp[0][j] = j # Cost of adding all elements
21
+
22
+ # Fill DP table
23
+ for i in range(1, m + 1):
24
+ for j in range(1, n + 1):
25
+ if input[i - 1] == output[j - 1]:
26
+ # Elements match, no operation needed
27
+ dp[i][j] = dp[i - 1][j - 1]
28
+ else:
29
+ # Take minimum of three operations
30
+ dp[i][j] = min(
31
+ dp[i - 1][j] + 1, # Remove from input
32
+ dp[i][j - 1] + 1, # Add from output
33
+ dp[i - 1][j - 1] + 1, # Replace
34
+ )
35
+
36
+ # Traceback to extract operations
37
+ ops = []
38
+ rops = []
39
+ i, j = m, n
40
+
41
+ while i > 0 or j > 0:
42
+ if i > 0 and j > 0 and input[i - 1] == output[j - 1]:
43
+ # Elements match, no operation
44
+ i -= 1
45
+ j -= 1
46
+ elif i > 0 and (j == 0 or dp[i][j] == dp[i - 1][j] + 1):
47
+ # Remove from input
48
+ ops.append({"op": "remove", "idx": i - 1})
49
+ rops.append({"op": "add", "idx": j - 1, "value": input[i - 1]})
50
+ i -= 1
51
+ elif j > 0 and (i == 0 or dp[i][j] == dp[i][j - 1] + 1):
52
+ # Add from output
53
+ ops.append({"op": "add", "idx": i - 1, "value": output[j - 1]})
54
+ rops.append({"op": "remove", "idx": j - 1})
55
+ j -= 1
56
+ else:
57
+ # Replace
58
+ ops.append(
59
+ {
60
+ "op": "replace",
61
+ "idx": i - 1,
62
+ "original": input[i - 1],
63
+ "value": output[j - 1],
64
+ }
65
+ )
66
+ rops.append(
67
+ {
68
+ "op": "replace",
69
+ "idx": j - 1,
70
+ "original": output[j - 1],
71
+ "value": input[i - 1],
72
+ }
73
+ )
74
+ i -= 1
75
+ j -= 1
76
+
77
+ # Apply padding to operations (using explicit loops instead of reduce)
78
+ padded_ops = []
79
+ padding = 0
80
+ # Iterate in reverse to get correct order (traceback extracts operations backwards)
81
+ for op in reversed(ops):
82
+ if op["op"] == "add":
83
+ padded_idx = op["idx"] + 1 + padding
84
+ idx_token = padded_idx if padded_idx < len(input) + padding else "-"
85
+ padded_ops.append(
86
+ {
87
+ "op": "add",
88
+ "path": ptr.append(idx_token),
89
+ "value": op["value"],
90
+ }
91
+ )
92
+ padding += 1
93
+ elif op["op"] == "remove":
94
+ padded_ops.append(
95
+ {
96
+ "op": "remove",
97
+ "path": ptr.append(op["idx"] + padding),
98
+ }
99
+ )
100
+ padding -= 1
101
+ else: # replace
102
+ replace_ptr = ptr.append(op["idx"] + padding)
103
+ replace_ops, _ = diff(op["original"], op["value"], replace_ptr)
104
+ padded_ops.extend(replace_ops)
105
+
106
+ padded_rops = []
107
+ padding = 0
108
+ # Iterate in reverse to get correct order (traceback extracts operations backwards)
109
+ for op in reversed(rops):
110
+ if op["op"] == "add":
111
+ padded_idx = op["idx"] + 1 + padding
112
+ idx_token = padded_idx if padded_idx < len(output) + padding else "-"
113
+ padded_rops.append(
114
+ {
115
+ "op": "add",
116
+ "path": ptr.append(idx_token),
117
+ "value": op["value"],
118
+ }
119
+ )
120
+ padding += 1
121
+ elif op["op"] == "remove":
122
+ padded_rops.append(
123
+ {
124
+ "op": "remove",
125
+ "path": ptr.append(op["idx"] + padding),
126
+ }
127
+ )
128
+ padding -= 1
129
+ else: # replace
130
+ replace_ptr = ptr.append(op["idx"] + padding)
131
+ replace_ops, _ = diff(op["original"], op["value"], replace_ptr)
132
+ padded_rops.extend(replace_ops)
133
+
134
+ return padded_ops, padded_rops
135
+
136
+
137
+ def diff_dicts(input: Dict, output: Dict, ptr: Pointer) -> Tuple[List, List]:
138
+ ops, rops = [], []
139
+ input_keys = set(input.keys())
140
+ output_keys = set(output.keys())
141
+ for key in input_keys - output_keys:
142
+ ops.append({"op": "remove", "path": ptr.append(key)})
143
+ rops.insert(0, {"op": "add", "path": ptr.append(key), "value": input[key]})
144
+ for key in output_keys - input_keys:
145
+ ops.append(
146
+ {
147
+ "op": "add",
148
+ "path": ptr.append(key),
149
+ "value": output[key],
150
+ }
151
+ )
152
+ rops.insert(0, {"op": "remove", "path": ptr.append(key)})
153
+ for key in input_keys & output_keys:
154
+ key_ops, key_rops = diff(input[key], output[key], ptr.append(key))
155
+ ops.extend(key_ops)
156
+ key_rops.extend(rops)
157
+ rops = key_rops
158
+ return ops, rops
159
+
160
+
161
+ def diff_sets(input: Set, output: Set, ptr: Pointer) -> Tuple[List, List]:
162
+ ops, rops = [], []
163
+ for value in input - output:
164
+ ops.append({"op": "remove", "path": ptr.append(value)})
165
+ rops.insert(0, {"op": "add", "path": ptr.append("-"), "value": value})
166
+ for value in output - input:
167
+ ops.append({"op": "add", "path": ptr.append("-"), "value": value})
168
+ rops.insert(0, {"op": "remove", "path": ptr.append(value)})
169
+ return ops, rops
170
+
171
+
172
+ def diff(
173
+ input: Diffable, output: Diffable, ptr: Pointer | None = None
174
+ ) -> Tuple[List, List]:
175
+ if input == output:
176
+ return [], []
177
+ if ptr is None:
178
+ ptr = Pointer()
179
+ if hasattr(input, "append") and hasattr(output, "append"): # list
180
+ return diff_lists(input, output, ptr)
181
+ if hasattr(input, "keys") and hasattr(output, "keys"): # dict
182
+ return diff_dicts(input, output, ptr)
183
+ if hasattr(input, "add") and hasattr(output, "add"): # set
184
+ return diff_sets(input, output, ptr)
185
+ return [{"op": "replace", "path": ptr, "value": output}], [
186
+ {"op": "replace", "path": ptr, "value": input}
187
+ ]
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import re
4
- from typing import Any, Hashable, List, Tuple
4
+ from typing import Any, Hashable, Iterable, Tuple
5
5
 
6
6
  from .types import Diffable
7
7
 
@@ -20,7 +20,7 @@ def escape(token: str) -> str:
20
20
 
21
21
 
22
22
  class Pointer:
23
- def __init__(self, tokens: List[Hashable] | None = None) -> None:
23
+ def __init__(self, tokens: Iterable[Hashable] | None = None) -> None:
24
24
  if tokens is None:
25
25
  tokens = []
26
26
  self.tokens = tuple(tokens)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "patchdiff"
3
- version = "0.3.5"
3
+ version = "0.3.6"
4
4
  description = "MIT"
5
5
  authors = [
6
6
  { name = "Korijn van Golen", email = "korijn@gmail.com" },
@@ -18,6 +18,7 @@ dev = [
18
18
  "pytest",
19
19
  "pytest-cov",
20
20
  "pytest-watch",
21
+ "pytest-benchmark",
21
22
  ]
22
23
 
23
24
  [tool.ruff.lint]
@@ -1,141 +0,0 @@
1
- from functools import partial, reduce
2
- from typing import Dict, List, Set, Tuple
3
-
4
- from .pointer import Pointer
5
- from .types import Diffable
6
-
7
-
8
- def diff_lists(input: List, output: List, ptr: Pointer) -> Tuple[List, List]:
9
- memory = {(0, 0): {"ops": [], "rops": [], "cost": 0}}
10
-
11
- def dist(i, j):
12
- if (i, j) not in memory:
13
- if i > 0 and j > 0 and input[i - 1] == output[j - 1]:
14
- step = dist(i - 1, j - 1)
15
- else:
16
- paths = []
17
- if i > 0:
18
- base = dist(i - 1, j)
19
- op = {"op": "remove", "idx": i - 1}
20
- rop = {"op": "add", "idx": j - 1, "value": input[i - 1]}
21
- paths.append(
22
- {
23
- "ops": base["ops"] + [op],
24
- "rops": base["rops"] + [rop],
25
- "cost": base["cost"] + 1,
26
- }
27
- )
28
- if j > 0:
29
- base = dist(i, j - 1)
30
- op = {"op": "add", "idx": i - 1, "value": output[j - 1]}
31
- rop = {"op": "remove", "idx": j - 1}
32
- paths.append(
33
- {
34
- "ops": base["ops"] + [op],
35
- "rops": base["rops"] + [rop],
36
- "cost": base["cost"] + 1,
37
- }
38
- )
39
- if i > 0 and j > 0:
40
- base = dist(i - 1, j - 1)
41
- op = {
42
- "op": "replace",
43
- "idx": i - 1,
44
- "original": input[i - 1],
45
- "value": output[j - 1],
46
- }
47
- rop = {
48
- "op": "replace",
49
- "idx": j - 1,
50
- "original": output[j - 1],
51
- "value": input[i - 1],
52
- }
53
- paths.append(
54
- {
55
- "ops": base["ops"] + [op],
56
- "rops": base["rops"] + [rop],
57
- "cost": base["cost"] + 1,
58
- }
59
- )
60
- step = min(paths, key=lambda a: a["cost"])
61
- memory[(i, j)] = step
62
- return memory[(i, j)]
63
-
64
- def pad(state, op, target=None):
65
- ops, padding = state
66
- if op["op"] == "add":
67
- padded_idx = op["idx"] + 1 + padding
68
- idx_token = padded_idx if padded_idx < len(target) + padding else "-"
69
- full_op = {
70
- "op": "add",
71
- "path": ptr.append(idx_token),
72
- "value": op["value"],
73
- }
74
- return [[*ops, full_op], padding + 1]
75
- elif op["op"] == "remove":
76
- full_op = {
77
- "op": "remove",
78
- "path": ptr.append(op["idx"] + padding),
79
- }
80
- return [[*ops, full_op], padding - 1]
81
- else:
82
- replace_ptr = ptr.append(op["idx"] + padding)
83
- replace_ops, _ = diff(op["original"], op["value"], replace_ptr)
84
- return [ops + replace_ops, padding]
85
-
86
- solution = dist(len(input), len(output))
87
- padded_ops, _ = reduce(partial(pad, target=input), solution["ops"], [[], 0])
88
- padded_rops, _ = reduce(partial(pad, target=output), solution["rops"], [[], 0])
89
-
90
- return padded_ops, padded_rops
91
-
92
-
93
- def diff_dicts(input: Dict, output: Dict, ptr: Pointer) -> Tuple[List, List]:
94
- ops, rops = [], []
95
- input_keys = set(input.keys())
96
- output_keys = set(output.keys())
97
- for key in input_keys - output_keys:
98
- ops.append({"op": "remove", "path": ptr.append(key)})
99
- rops.insert(0, {"op": "add", "path": ptr.append(key), "value": input[key]})
100
- for key in output_keys - input_keys:
101
- ops.append(
102
- {
103
- "op": "add",
104
- "path": ptr.append(key),
105
- "value": output[key],
106
- }
107
- )
108
- rops.insert(0, {"op": "remove", "path": ptr.append(key)})
109
- for key in input_keys & output_keys:
110
- key_ops, key_rops = diff(input[key], output[key], ptr.append(key))
111
- ops.extend(key_ops)
112
- key_rops.extend(rops)
113
- rops = key_rops
114
- return ops, rops
115
-
116
-
117
- def diff_sets(input: Set, output: Set, ptr: Pointer) -> Tuple[List, List]:
118
- ops, rops = [], []
119
- for value in input - output:
120
- ops.append({"op": "remove", "path": ptr.append(value)})
121
- rops.insert(0, {"op": "add", "path": ptr.append("-"), "value": value})
122
- for value in output - input:
123
- ops.append({"op": "add", "path": ptr.append("-"), "value": value})
124
- rops.insert(0, {"op": "remove", "path": ptr.append(value)})
125
- return ops, rops
126
-
127
-
128
- def diff(input: Diffable, output: Diffable, ptr: Pointer = None) -> Tuple[List, List]:
129
- if input == output:
130
- return [], []
131
- if ptr is None:
132
- ptr = Pointer()
133
- if hasattr(input, "append") and hasattr(output, "append"): # list
134
- return diff_lists(input, output, ptr)
135
- if hasattr(input, "keys") and hasattr(output, "keys"): # dict
136
- return diff_dicts(input, output, ptr)
137
- if hasattr(input, "add") and hasattr(output, "add"): # set
138
- return diff_sets(input, output, ptr)
139
- return [{"op": "replace", "path": ptr, "value": output}], [
140
- {"op": "replace", "path": ptr, "value": input}
141
- ]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes