patchdiff 0.3.5__tar.gz → 0.3.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patchdiff-0.3.7/.github/workflows/benchmark.yml +56 -0
- {patchdiff-0.3.5 → patchdiff-0.3.7}/.gitignore +2 -0
- {patchdiff-0.3.5 → patchdiff-0.3.7}/PKG-INFO +1 -1
- patchdiff-0.3.7/benchmarks/benchmark.py +232 -0
- {patchdiff-0.3.5 → patchdiff-0.3.7}/patchdiff/__init__.py +4 -2
- {patchdiff-0.3.5 → patchdiff-0.3.7}/patchdiff/apply.py +2 -0
- patchdiff-0.3.7/patchdiff/diff.py +194 -0
- {patchdiff-0.3.5 → patchdiff-0.3.7}/patchdiff/pointer.py +12 -13
- {patchdiff-0.3.5 → patchdiff-0.3.7}/pyproject.toml +2 -1
- patchdiff-0.3.5/patchdiff/diff.py +0 -141
- {patchdiff-0.3.5 → patchdiff-0.3.7}/.github/workflows/ci.yml +0 -0
- {patchdiff-0.3.5 → patchdiff-0.3.7}/README.md +0 -0
- {patchdiff-0.3.5 → patchdiff-0.3.7}/patchdiff/serialize.py +0 -0
- {patchdiff-0.3.5 → patchdiff-0.3.7}/patchdiff/types.py +0 -0
- {patchdiff-0.3.5 → patchdiff-0.3.7}/tests/test_apply.py +0 -0
- {patchdiff-0.3.5 → patchdiff-0.3.7}/tests/test_diff.py +0 -0
- {patchdiff-0.3.5 → patchdiff-0.3.7}/tests/test_pointer.py +0 -0
- {patchdiff-0.3.5 → patchdiff-0.3.7}/tests/test_proxy.py +0 -0
- {patchdiff-0.3.5 → patchdiff-0.3.7}/tests/test_serialize.py +0 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
name: Benchmarks
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
branches:
|
|
6
|
+
- master
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
benchmark:
|
|
10
|
+
name: Benchmarks
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v5
|
|
14
|
+
|
|
15
|
+
- name: Install uv
|
|
16
|
+
uses: astral-sh/setup-uv@v6
|
|
17
|
+
|
|
18
|
+
- name: Set up Python 3.14
|
|
19
|
+
uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: "3.14"
|
|
22
|
+
|
|
23
|
+
- name: Install dependencies
|
|
24
|
+
run: uv sync
|
|
25
|
+
|
|
26
|
+
# On PRs: run benchmarks twice (PR code vs master code) and compare
|
|
27
|
+
- name: Run benchmarks
|
|
28
|
+
run: |
|
|
29
|
+
# Checkout master version of patchdiff directory
|
|
30
|
+
git fetch origin master
|
|
31
|
+
git checkout origin/master -- patchdiff/
|
|
32
|
+
|
|
33
|
+
# Run benchmarks with master code as baseline
|
|
34
|
+
uv run --no-sync pytest benchmarks/benchmark.py \
|
|
35
|
+
--benchmark-only \
|
|
36
|
+
--benchmark-save=master \
|
|
37
|
+
--benchmark-sort=mean || true
|
|
38
|
+
|
|
39
|
+
# Restore PR code
|
|
40
|
+
git checkout HEAD -- patchdiff/
|
|
41
|
+
|
|
42
|
+
# Run benchmarks on PR code and compare
|
|
43
|
+
uv run --no-sync pytest benchmarks/benchmark.py \
|
|
44
|
+
--benchmark-only \
|
|
45
|
+
--benchmark-compare \
|
|
46
|
+
--benchmark-compare-fail=mean:5% \
|
|
47
|
+
--benchmark-save=branch \
|
|
48
|
+
--benchmark-sort=mean
|
|
49
|
+
|
|
50
|
+
- name: Upload benchmarks
|
|
51
|
+
if: always()
|
|
52
|
+
uses: actions/upload-artifact@v4
|
|
53
|
+
with:
|
|
54
|
+
name: Benchmarks
|
|
55
|
+
path: .benchmarks/
|
|
56
|
+
include-hidden-files: true
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Benchmark suite for patchdiff performance testing using pytest-benchmark.
|
|
3
|
+
|
|
4
|
+
Run benchmarks:
|
|
5
|
+
uv run pytest benchmarks/benchmark.py --benchmark-only
|
|
6
|
+
|
|
7
|
+
Save baseline:
|
|
8
|
+
uv run pytest benchmarks/benchmark.py --benchmark-only --benchmark-autosave
|
|
9
|
+
|
|
10
|
+
Compare against baseline:
|
|
11
|
+
uv run pytest benchmarks/benchmark.py --benchmark-only --benchmark-compare=0001
|
|
12
|
+
|
|
13
|
+
Fail if performance degrades >5%:
|
|
14
|
+
uv run pytest benchmarks/benchmark.py --benchmark-only --benchmark-compare=0001 --benchmark-compare-fail=mean:5%
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import random
|
|
18
|
+
|
|
19
|
+
import pytest
|
|
20
|
+
|
|
21
|
+
from patchdiff import apply, diff
|
|
22
|
+
from patchdiff.pointer import Pointer
|
|
23
|
+
|
|
24
|
+
# Set seed for reproducibility
|
|
25
|
+
random.seed(42)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def generate_random_list(size: int, value_range: int = 1000) -> list[int]:
|
|
29
|
+
"""Generate a random list of integers."""
|
|
30
|
+
return [random.randint(0, value_range) for _ in range(size)]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def generate_similar_lists(
|
|
34
|
+
size: int, change_ratio: float = 0.1
|
|
35
|
+
) -> tuple[list[int], list[int]]:
|
|
36
|
+
"""
|
|
37
|
+
Generate two similar lists with specified change ratio.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
size: Size of the lists
|
|
41
|
+
change_ratio: Ratio of elements that differ (0.0 to 1.0)
|
|
42
|
+
"""
|
|
43
|
+
list_a = generate_random_list(size)
|
|
44
|
+
list_b = list_a.copy()
|
|
45
|
+
|
|
46
|
+
num_changes = int(size * change_ratio)
|
|
47
|
+
|
|
48
|
+
# Make some replacements
|
|
49
|
+
for _ in range(num_changes // 3):
|
|
50
|
+
idx = random.randint(0, size - 1)
|
|
51
|
+
list_b[idx] = random.randint(0, 1000)
|
|
52
|
+
|
|
53
|
+
# Make some insertions
|
|
54
|
+
for _ in range(num_changes // 3):
|
|
55
|
+
idx = random.randint(0, len(list_b))
|
|
56
|
+
list_b.insert(idx, random.randint(0, 1000))
|
|
57
|
+
|
|
58
|
+
# Make some deletions
|
|
59
|
+
for _ in range(num_changes // 3):
|
|
60
|
+
if list_b:
|
|
61
|
+
idx = random.randint(0, len(list_b) - 1)
|
|
62
|
+
del list_b[idx]
|
|
63
|
+
|
|
64
|
+
return list_a, list_b
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def generate_nested_dict(depth: int, breadth: int) -> dict | int:
|
|
68
|
+
"""Generate a nested dictionary structure."""
|
|
69
|
+
if depth == 0:
|
|
70
|
+
return random.randint(0, 1000)
|
|
71
|
+
|
|
72
|
+
result = {}
|
|
73
|
+
for i in range(breadth):
|
|
74
|
+
key = f"key_{i}"
|
|
75
|
+
if random.random() > 0.3:
|
|
76
|
+
result[key] = generate_nested_dict(depth - 1, breadth)
|
|
77
|
+
else:
|
|
78
|
+
result[key] = random.randint(0, 1000)
|
|
79
|
+
return result
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# ========================================
|
|
83
|
+
# List Diff Benchmarks
|
|
84
|
+
# ========================================
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@pytest.mark.benchmark(group="list-diff")
|
|
88
|
+
def test_list_diff_small_10pct(benchmark):
|
|
89
|
+
"""Benchmark: 50 element list with 10% changes."""
|
|
90
|
+
a, b = generate_similar_lists(50, 0.1)
|
|
91
|
+
benchmark(diff, a, b)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@pytest.mark.benchmark(group="list-diff")
|
|
95
|
+
@pytest.mark.parametrize("change_ratio", [0.05, 0.1, 0.5])
|
|
96
|
+
def test_list_diff_medium(benchmark, change_ratio):
|
|
97
|
+
"""Benchmark: 1000 element list with varying change ratios."""
|
|
98
|
+
a, b = generate_similar_lists(1000, change_ratio)
|
|
99
|
+
benchmark(diff, a, b)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@pytest.mark.benchmark(group="list-diff-edge")
|
|
103
|
+
def test_list_diff_completely_different(benchmark):
|
|
104
|
+
"""Benchmark: Two completely different 1000 element lists."""
|
|
105
|
+
a = generate_random_list(1000)
|
|
106
|
+
b = generate_random_list(1000)
|
|
107
|
+
benchmark(diff, a, b)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@pytest.mark.benchmark(group="list-diff-edge")
|
|
111
|
+
def test_list_diff_identical(benchmark):
|
|
112
|
+
"""Benchmark: Two identical 10000 element lists."""
|
|
113
|
+
a = generate_random_list(10000)
|
|
114
|
+
b = a.copy()
|
|
115
|
+
benchmark(diff, a, b)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# ========================================
|
|
119
|
+
# Dict Diff Benchmarks
|
|
120
|
+
# ========================================
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@pytest.mark.benchmark(group="dict-diff")
|
|
124
|
+
def test_dict_diff_flat_500_keys(benchmark):
|
|
125
|
+
"""Benchmark: Flat dict with 500 keys, 10% changed."""
|
|
126
|
+
a = {f"key_{i}": i for i in range(500)}
|
|
127
|
+
b = a.copy()
|
|
128
|
+
# Change 10%
|
|
129
|
+
for i in range(50):
|
|
130
|
+
b[f"key_{i}"] = i + 500
|
|
131
|
+
|
|
132
|
+
benchmark(diff, a, b)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@pytest.mark.benchmark(group="dict-diff")
|
|
136
|
+
def test_dict_diff_nested(benchmark):
|
|
137
|
+
"""Benchmark: Nested dict with depth=3, breadth=5."""
|
|
138
|
+
a = generate_nested_dict(3, 5)
|
|
139
|
+
b = generate_nested_dict(3, 5)
|
|
140
|
+
benchmark(diff, a, b)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
# ========================================
|
|
144
|
+
# Set Diff Benchmarks
|
|
145
|
+
# ========================================
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@pytest.mark.benchmark(group="set-diff")
|
|
149
|
+
def test_set_diff_1000_elements(benchmark):
|
|
150
|
+
"""Benchmark: Sets with 1000 elements, 10% difference."""
|
|
151
|
+
a = set(generate_random_list(1000, 2000))
|
|
152
|
+
b = a.copy()
|
|
153
|
+
# Remove 5%
|
|
154
|
+
a_list = list(a)
|
|
155
|
+
for i in range(50):
|
|
156
|
+
a.remove(a_list[i])
|
|
157
|
+
# Add 5%
|
|
158
|
+
for i in range(50):
|
|
159
|
+
b.add(2000 + i)
|
|
160
|
+
|
|
161
|
+
benchmark(diff, a, b)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
# ========================================
|
|
165
|
+
# Mixed Structure Benchmarks
|
|
166
|
+
# ========================================
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@pytest.mark.benchmark(group="mixed")
|
|
170
|
+
def test_mixed_dict_with_list_values(benchmark):
|
|
171
|
+
"""Benchmark: Dict with 50 keys, each containing a 100-element list."""
|
|
172
|
+
a = {f"key_{i}": generate_random_list(100) for i in range(50)}
|
|
173
|
+
b = {f"key_{i}": generate_random_list(100) for i in range(50)}
|
|
174
|
+
benchmark(diff, a, b)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# ========================================
|
|
178
|
+
# Apply Benchmarks
|
|
179
|
+
# ========================================
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@pytest.mark.benchmark(group="apply")
|
|
183
|
+
def test_apply_list_1000_elements(benchmark):
|
|
184
|
+
"""Benchmark: Apply patch to 1000 element list with 10% changes."""
|
|
185
|
+
a, b = generate_similar_lists(1000, 0.1)
|
|
186
|
+
ops, _ = diff(a, b)
|
|
187
|
+
|
|
188
|
+
benchmark(apply, a, ops)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
# ========================================
|
|
192
|
+
# Pointer Evaluate Benchmarks
|
|
193
|
+
# ========================================
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
@pytest.mark.benchmark(group="pointer-evaluate")
|
|
197
|
+
def test_pointer_evaluate_deep_dict(benchmark):
|
|
198
|
+
"""Benchmark: Evaluate pointer on deeply nested structure."""
|
|
199
|
+
depth = 100
|
|
200
|
+
obj = 42
|
|
201
|
+
for i in range(depth - 1, -1, -1):
|
|
202
|
+
obj = {f"key_{i}": obj}
|
|
203
|
+
ptr = Pointer([f"key_{i}" for i in range(depth)])
|
|
204
|
+
|
|
205
|
+
benchmark(ptr.evaluate, obj)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
@pytest.mark.benchmark(group="pointer-evaluate")
|
|
209
|
+
def test_pointer_evaluate_deep_list(benchmark):
|
|
210
|
+
"""Benchmark: Evaluate pointer on deep lists."""
|
|
211
|
+
# Build nested lists 100 levels deep; innermost value is 42.
|
|
212
|
+
depth = 100
|
|
213
|
+
nested = 42
|
|
214
|
+
for _ in range(depth):
|
|
215
|
+
nested = [nested]
|
|
216
|
+
obj = nested
|
|
217
|
+
ptr = Pointer([0] * depth)
|
|
218
|
+
|
|
219
|
+
benchmark(ptr.evaluate, obj)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
# ========================================
|
|
223
|
+
# Pointer Append Benchmarks
|
|
224
|
+
# ========================================
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
@pytest.mark.benchmark(group="pointer-append")
|
|
228
|
+
def test_pointer_append(benchmark):
|
|
229
|
+
"""Benchmark: Append token to pointer."""
|
|
230
|
+
ptr = Pointer.from_str("/a/b/c/d/e/f/g/h/i/j")
|
|
231
|
+
|
|
232
|
+
benchmark(ptr.append, "k")
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Dict, List, Set, Tuple
|
|
4
|
+
|
|
5
|
+
from .pointer import Pointer
|
|
6
|
+
from .types import Diffable
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def diff_lists(input: List, output: List, ptr: Pointer) -> Tuple[List, List]:
|
|
10
|
+
m, n = len(input), len(output)
|
|
11
|
+
|
|
12
|
+
# Build DP table bottom-up (iterative approach)
|
|
13
|
+
# dp[i][j] = cost of transforming input[0:i] to output[0:j]
|
|
14
|
+
dp = [[0] * (n + 1) for _ in range(m + 1)]
|
|
15
|
+
|
|
16
|
+
# Initialize base cases
|
|
17
|
+
for i in range(1, m + 1):
|
|
18
|
+
dp[i][0] = i # Cost of deleting all elements
|
|
19
|
+
for j in range(1, n + 1):
|
|
20
|
+
dp[0][j] = j # Cost of adding all elements
|
|
21
|
+
|
|
22
|
+
# Fill DP table
|
|
23
|
+
for i in range(1, m + 1):
|
|
24
|
+
for j in range(1, n + 1):
|
|
25
|
+
if input[i - 1] == output[j - 1]:
|
|
26
|
+
# Elements match, no operation needed
|
|
27
|
+
dp[i][j] = dp[i - 1][j - 1]
|
|
28
|
+
else:
|
|
29
|
+
# Take minimum of three operations
|
|
30
|
+
dp[i][j] = min(
|
|
31
|
+
dp[i - 1][j] + 1, # Remove from input
|
|
32
|
+
dp[i][j - 1] + 1, # Add from output
|
|
33
|
+
dp[i - 1][j - 1] + 1, # Replace
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Traceback to extract operations
|
|
37
|
+
ops = []
|
|
38
|
+
rops = []
|
|
39
|
+
i, j = m, n
|
|
40
|
+
|
|
41
|
+
while i > 0 or j > 0:
|
|
42
|
+
if i > 0 and j > 0 and input[i - 1] == output[j - 1]:
|
|
43
|
+
# Elements match, no operation
|
|
44
|
+
i -= 1
|
|
45
|
+
j -= 1
|
|
46
|
+
elif i > 0 and (j == 0 or dp[i][j] == dp[i - 1][j] + 1):
|
|
47
|
+
# Remove from input
|
|
48
|
+
ops.append({"op": "remove", "idx": i - 1})
|
|
49
|
+
rops.append({"op": "add", "idx": j - 1, "value": input[i - 1]})
|
|
50
|
+
i -= 1
|
|
51
|
+
elif j > 0 and (i == 0 or dp[i][j] == dp[i][j - 1] + 1):
|
|
52
|
+
# Add from output
|
|
53
|
+
ops.append({"op": "add", "idx": i - 1, "value": output[j - 1]})
|
|
54
|
+
rops.append({"op": "remove", "idx": j - 1})
|
|
55
|
+
j -= 1
|
|
56
|
+
else:
|
|
57
|
+
# Replace
|
|
58
|
+
ops.append(
|
|
59
|
+
{
|
|
60
|
+
"op": "replace",
|
|
61
|
+
"idx": i - 1,
|
|
62
|
+
"original": input[i - 1],
|
|
63
|
+
"value": output[j - 1],
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
rops.append(
|
|
67
|
+
{
|
|
68
|
+
"op": "replace",
|
|
69
|
+
"idx": j - 1,
|
|
70
|
+
"original": output[j - 1],
|
|
71
|
+
"value": input[i - 1],
|
|
72
|
+
}
|
|
73
|
+
)
|
|
74
|
+
i -= 1
|
|
75
|
+
j -= 1
|
|
76
|
+
|
|
77
|
+
# Apply padding to operations (using explicit loops instead of reduce)
|
|
78
|
+
padded_ops = []
|
|
79
|
+
padding = 0
|
|
80
|
+
# Iterate in reverse to get correct order (traceback extracts operations backwards)
|
|
81
|
+
for op in reversed(ops):
|
|
82
|
+
if op["op"] == "add":
|
|
83
|
+
padded_idx = op["idx"] + 1 + padding
|
|
84
|
+
idx_token = padded_idx if padded_idx < len(input) + padding else "-"
|
|
85
|
+
padded_ops.append(
|
|
86
|
+
{
|
|
87
|
+
"op": "add",
|
|
88
|
+
"path": ptr.append(idx_token),
|
|
89
|
+
"value": op["value"],
|
|
90
|
+
}
|
|
91
|
+
)
|
|
92
|
+
padding += 1
|
|
93
|
+
elif op["op"] == "remove":
|
|
94
|
+
padded_ops.append(
|
|
95
|
+
{
|
|
96
|
+
"op": "remove",
|
|
97
|
+
"path": ptr.append(op["idx"] + padding),
|
|
98
|
+
}
|
|
99
|
+
)
|
|
100
|
+
padding -= 1
|
|
101
|
+
else: # replace
|
|
102
|
+
replace_ptr = ptr.append(op["idx"] + padding)
|
|
103
|
+
replace_ops, _ = diff(op["original"], op["value"], replace_ptr)
|
|
104
|
+
padded_ops.extend(replace_ops)
|
|
105
|
+
|
|
106
|
+
padded_rops = []
|
|
107
|
+
padding = 0
|
|
108
|
+
# Iterate in reverse to get correct order (traceback extracts operations backwards)
|
|
109
|
+
for op in reversed(rops):
|
|
110
|
+
if op["op"] == "add":
|
|
111
|
+
padded_idx = op["idx"] + 1 + padding
|
|
112
|
+
idx_token = padded_idx if padded_idx < len(output) + padding else "-"
|
|
113
|
+
padded_rops.append(
|
|
114
|
+
{
|
|
115
|
+
"op": "add",
|
|
116
|
+
"path": ptr.append(idx_token),
|
|
117
|
+
"value": op["value"],
|
|
118
|
+
}
|
|
119
|
+
)
|
|
120
|
+
padding += 1
|
|
121
|
+
elif op["op"] == "remove":
|
|
122
|
+
padded_rops.append(
|
|
123
|
+
{
|
|
124
|
+
"op": "remove",
|
|
125
|
+
"path": ptr.append(op["idx"] + padding),
|
|
126
|
+
}
|
|
127
|
+
)
|
|
128
|
+
padding -= 1
|
|
129
|
+
else: # replace
|
|
130
|
+
replace_ptr = ptr.append(op["idx"] + padding)
|
|
131
|
+
replace_ops, _ = diff(op["original"], op["value"], replace_ptr)
|
|
132
|
+
padded_rops.extend(replace_ops)
|
|
133
|
+
|
|
134
|
+
return padded_ops, padded_rops
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def diff_dicts(input: Dict, output: Dict, ptr: Pointer) -> Tuple[List, List]:
|
|
138
|
+
ops, rops = [], []
|
|
139
|
+
input_keys = set(input.keys()) if input else set()
|
|
140
|
+
output_keys = set(output.keys()) if output else set()
|
|
141
|
+
if input_only := input_keys - output_keys:
|
|
142
|
+
for key in input_only:
|
|
143
|
+
key_ptr = ptr.append(key)
|
|
144
|
+
ops.append({"op": "remove", "path": key_ptr})
|
|
145
|
+
rops.insert(0, {"op": "add", "path": key_ptr, "value": input[key]})
|
|
146
|
+
if output_only := output_keys - input_keys:
|
|
147
|
+
for key in output_only:
|
|
148
|
+
key_ptr = ptr.append(key)
|
|
149
|
+
ops.append(
|
|
150
|
+
{
|
|
151
|
+
"op": "add",
|
|
152
|
+
"path": key_ptr,
|
|
153
|
+
"value": output[key],
|
|
154
|
+
}
|
|
155
|
+
)
|
|
156
|
+
rops.insert(0, {"op": "remove", "path": key_ptr})
|
|
157
|
+
if common := input_keys & output_keys:
|
|
158
|
+
for key in common:
|
|
159
|
+
key_ops, key_rops = diff(input[key], output[key], ptr.append(key))
|
|
160
|
+
ops.extend(key_ops)
|
|
161
|
+
key_rops.extend(rops)
|
|
162
|
+
rops = key_rops
|
|
163
|
+
return ops, rops
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def diff_sets(input: Set, output: Set, ptr: Pointer) -> Tuple[List, List]:
|
|
167
|
+
ops, rops = [], []
|
|
168
|
+
if input_only := input - output:
|
|
169
|
+
for value in input_only:
|
|
170
|
+
ops.append({"op": "remove", "path": ptr.append(value)})
|
|
171
|
+
rops.insert(0, {"op": "add", "path": ptr.append("-"), "value": value})
|
|
172
|
+
if output_only := output - input:
|
|
173
|
+
for value in output_only:
|
|
174
|
+
ops.append({"op": "add", "path": ptr.append("-"), "value": value})
|
|
175
|
+
rops.insert(0, {"op": "remove", "path": ptr.append(value)})
|
|
176
|
+
return ops, rops
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def diff(
|
|
180
|
+
input: Diffable, output: Diffable, ptr: Pointer | None = None
|
|
181
|
+
) -> Tuple[List, List]:
|
|
182
|
+
if input == output:
|
|
183
|
+
return [], []
|
|
184
|
+
if ptr is None:
|
|
185
|
+
ptr = Pointer()
|
|
186
|
+
if hasattr(input, "append") and hasattr(output, "append"): # list
|
|
187
|
+
return diff_lists(input, output, ptr)
|
|
188
|
+
if hasattr(input, "keys") and hasattr(output, "keys"): # dict
|
|
189
|
+
return diff_dicts(input, output, ptr)
|
|
190
|
+
if hasattr(input, "add") and hasattr(output, "add"): # set
|
|
191
|
+
return diff_sets(input, output, ptr)
|
|
192
|
+
return [{"op": "replace", "path": ptr, "value": output}], [
|
|
193
|
+
{"op": "replace", "path": ptr, "value": input}
|
|
194
|
+
]
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
|
-
from typing import Any, Hashable,
|
|
4
|
+
from typing import Any, Hashable, Iterable, Tuple
|
|
5
5
|
|
|
6
6
|
from .types import Diffable
|
|
7
7
|
|
|
@@ -20,7 +20,9 @@ def escape(token: str) -> str:
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class Pointer:
|
|
23
|
-
|
|
23
|
+
__slots__ = ("tokens",)
|
|
24
|
+
|
|
25
|
+
def __init__(self, tokens: Iterable[Hashable] | None = None) -> None:
|
|
24
26
|
if tokens is None:
|
|
25
27
|
tokens = []
|
|
26
28
|
self.tokens = tuple(tokens)
|
|
@@ -40,7 +42,7 @@ class Pointer:
|
|
|
40
42
|
return hash(self.tokens)
|
|
41
43
|
|
|
42
44
|
def __eq__(self, other: "Pointer") -> bool:
|
|
43
|
-
if
|
|
45
|
+
if other.__class__ != self.__class__:
|
|
44
46
|
return False
|
|
45
47
|
return self.tokens == other.tokens
|
|
46
48
|
|
|
@@ -48,17 +50,14 @@ class Pointer:
|
|
|
48
50
|
key = ""
|
|
49
51
|
parent = None
|
|
50
52
|
cursor = obj
|
|
51
|
-
|
|
52
|
-
parent = cursor
|
|
53
|
-
if hasattr(parent, "add"): # set
|
|
54
|
-
break
|
|
55
|
-
if hasattr(parent, "append"): # list
|
|
56
|
-
if key == "-":
|
|
57
|
-
break
|
|
53
|
+
if tokens := self.tokens:
|
|
58
54
|
try:
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
55
|
+
for key in tokens:
|
|
56
|
+
parent = cursor
|
|
57
|
+
cursor = parent[key]
|
|
58
|
+
except (KeyError, TypeError):
|
|
59
|
+
# KeyError for dicts, TypeError for sets and lists
|
|
60
|
+
pass
|
|
62
61
|
return parent, key, cursor
|
|
63
62
|
|
|
64
63
|
def append(self, token: Hashable) -> "Pointer":
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "patchdiff"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.7"
|
|
4
4
|
description = "MIT"
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Korijn van Golen", email = "korijn@gmail.com" },
|
|
@@ -18,6 +18,7 @@ dev = [
|
|
|
18
18
|
"pytest",
|
|
19
19
|
"pytest-cov",
|
|
20
20
|
"pytest-watch",
|
|
21
|
+
"pytest-benchmark",
|
|
21
22
|
]
|
|
22
23
|
|
|
23
24
|
[tool.ruff.lint]
|
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
from functools import partial, reduce
|
|
2
|
-
from typing import Dict, List, Set, Tuple
|
|
3
|
-
|
|
4
|
-
from .pointer import Pointer
|
|
5
|
-
from .types import Diffable
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def diff_lists(input: List, output: List, ptr: Pointer) -> Tuple[List, List]:
|
|
9
|
-
memory = {(0, 0): {"ops": [], "rops": [], "cost": 0}}
|
|
10
|
-
|
|
11
|
-
def dist(i, j):
|
|
12
|
-
if (i, j) not in memory:
|
|
13
|
-
if i > 0 and j > 0 and input[i - 1] == output[j - 1]:
|
|
14
|
-
step = dist(i - 1, j - 1)
|
|
15
|
-
else:
|
|
16
|
-
paths = []
|
|
17
|
-
if i > 0:
|
|
18
|
-
base = dist(i - 1, j)
|
|
19
|
-
op = {"op": "remove", "idx": i - 1}
|
|
20
|
-
rop = {"op": "add", "idx": j - 1, "value": input[i - 1]}
|
|
21
|
-
paths.append(
|
|
22
|
-
{
|
|
23
|
-
"ops": base["ops"] + [op],
|
|
24
|
-
"rops": base["rops"] + [rop],
|
|
25
|
-
"cost": base["cost"] + 1,
|
|
26
|
-
}
|
|
27
|
-
)
|
|
28
|
-
if j > 0:
|
|
29
|
-
base = dist(i, j - 1)
|
|
30
|
-
op = {"op": "add", "idx": i - 1, "value": output[j - 1]}
|
|
31
|
-
rop = {"op": "remove", "idx": j - 1}
|
|
32
|
-
paths.append(
|
|
33
|
-
{
|
|
34
|
-
"ops": base["ops"] + [op],
|
|
35
|
-
"rops": base["rops"] + [rop],
|
|
36
|
-
"cost": base["cost"] + 1,
|
|
37
|
-
}
|
|
38
|
-
)
|
|
39
|
-
if i > 0 and j > 0:
|
|
40
|
-
base = dist(i - 1, j - 1)
|
|
41
|
-
op = {
|
|
42
|
-
"op": "replace",
|
|
43
|
-
"idx": i - 1,
|
|
44
|
-
"original": input[i - 1],
|
|
45
|
-
"value": output[j - 1],
|
|
46
|
-
}
|
|
47
|
-
rop = {
|
|
48
|
-
"op": "replace",
|
|
49
|
-
"idx": j - 1,
|
|
50
|
-
"original": output[j - 1],
|
|
51
|
-
"value": input[i - 1],
|
|
52
|
-
}
|
|
53
|
-
paths.append(
|
|
54
|
-
{
|
|
55
|
-
"ops": base["ops"] + [op],
|
|
56
|
-
"rops": base["rops"] + [rop],
|
|
57
|
-
"cost": base["cost"] + 1,
|
|
58
|
-
}
|
|
59
|
-
)
|
|
60
|
-
step = min(paths, key=lambda a: a["cost"])
|
|
61
|
-
memory[(i, j)] = step
|
|
62
|
-
return memory[(i, j)]
|
|
63
|
-
|
|
64
|
-
def pad(state, op, target=None):
|
|
65
|
-
ops, padding = state
|
|
66
|
-
if op["op"] == "add":
|
|
67
|
-
padded_idx = op["idx"] + 1 + padding
|
|
68
|
-
idx_token = padded_idx if padded_idx < len(target) + padding else "-"
|
|
69
|
-
full_op = {
|
|
70
|
-
"op": "add",
|
|
71
|
-
"path": ptr.append(idx_token),
|
|
72
|
-
"value": op["value"],
|
|
73
|
-
}
|
|
74
|
-
return [[*ops, full_op], padding + 1]
|
|
75
|
-
elif op["op"] == "remove":
|
|
76
|
-
full_op = {
|
|
77
|
-
"op": "remove",
|
|
78
|
-
"path": ptr.append(op["idx"] + padding),
|
|
79
|
-
}
|
|
80
|
-
return [[*ops, full_op], padding - 1]
|
|
81
|
-
else:
|
|
82
|
-
replace_ptr = ptr.append(op["idx"] + padding)
|
|
83
|
-
replace_ops, _ = diff(op["original"], op["value"], replace_ptr)
|
|
84
|
-
return [ops + replace_ops, padding]
|
|
85
|
-
|
|
86
|
-
solution = dist(len(input), len(output))
|
|
87
|
-
padded_ops, _ = reduce(partial(pad, target=input), solution["ops"], [[], 0])
|
|
88
|
-
padded_rops, _ = reduce(partial(pad, target=output), solution["rops"], [[], 0])
|
|
89
|
-
|
|
90
|
-
return padded_ops, padded_rops
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def diff_dicts(input: Dict, output: Dict, ptr: Pointer) -> Tuple[List, List]:
|
|
94
|
-
ops, rops = [], []
|
|
95
|
-
input_keys = set(input.keys())
|
|
96
|
-
output_keys = set(output.keys())
|
|
97
|
-
for key in input_keys - output_keys:
|
|
98
|
-
ops.append({"op": "remove", "path": ptr.append(key)})
|
|
99
|
-
rops.insert(0, {"op": "add", "path": ptr.append(key), "value": input[key]})
|
|
100
|
-
for key in output_keys - input_keys:
|
|
101
|
-
ops.append(
|
|
102
|
-
{
|
|
103
|
-
"op": "add",
|
|
104
|
-
"path": ptr.append(key),
|
|
105
|
-
"value": output[key],
|
|
106
|
-
}
|
|
107
|
-
)
|
|
108
|
-
rops.insert(0, {"op": "remove", "path": ptr.append(key)})
|
|
109
|
-
for key in input_keys & output_keys:
|
|
110
|
-
key_ops, key_rops = diff(input[key], output[key], ptr.append(key))
|
|
111
|
-
ops.extend(key_ops)
|
|
112
|
-
key_rops.extend(rops)
|
|
113
|
-
rops = key_rops
|
|
114
|
-
return ops, rops
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def diff_sets(input: Set, output: Set, ptr: Pointer) -> Tuple[List, List]:
|
|
118
|
-
ops, rops = [], []
|
|
119
|
-
for value in input - output:
|
|
120
|
-
ops.append({"op": "remove", "path": ptr.append(value)})
|
|
121
|
-
rops.insert(0, {"op": "add", "path": ptr.append("-"), "value": value})
|
|
122
|
-
for value in output - input:
|
|
123
|
-
ops.append({"op": "add", "path": ptr.append("-"), "value": value})
|
|
124
|
-
rops.insert(0, {"op": "remove", "path": ptr.append(value)})
|
|
125
|
-
return ops, rops
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def diff(input: Diffable, output: Diffable, ptr: Pointer = None) -> Tuple[List, List]:
|
|
129
|
-
if input == output:
|
|
130
|
-
return [], []
|
|
131
|
-
if ptr is None:
|
|
132
|
-
ptr = Pointer()
|
|
133
|
-
if hasattr(input, "append") and hasattr(output, "append"): # list
|
|
134
|
-
return diff_lists(input, output, ptr)
|
|
135
|
-
if hasattr(input, "keys") and hasattr(output, "keys"): # dict
|
|
136
|
-
return diff_dicts(input, output, ptr)
|
|
137
|
-
if hasattr(input, "add") and hasattr(output, "add"): # set
|
|
138
|
-
return diff_sets(input, output, ptr)
|
|
139
|
-
return [{"op": "replace", "path": ptr, "value": output}], [
|
|
140
|
-
{"op": "replace", "path": ptr, "value": input}
|
|
141
|
-
]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|