shrinkray 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shrinkray/__init__.py +1 -0
- shrinkray/__main__.py +1205 -0
- shrinkray/learning.py +221 -0
- shrinkray/passes/__init__.py +0 -0
- shrinkray/passes/bytes.py +547 -0
- shrinkray/passes/clangdelta.py +230 -0
- shrinkray/passes/definitions.py +52 -0
- shrinkray/passes/genericlanguages.py +277 -0
- shrinkray/passes/json.py +91 -0
- shrinkray/passes/patching.py +280 -0
- shrinkray/passes/python.py +176 -0
- shrinkray/passes/sat.py +176 -0
- shrinkray/passes/sequences.py +69 -0
- shrinkray/problem.py +318 -0
- shrinkray/py.typed +0 -0
- shrinkray/reducer.py +430 -0
- shrinkray/work.py +217 -0
- shrinkray-0.0.0.dist-info/LICENSE +21 -0
- shrinkray-0.0.0.dist-info/METADATA +170 -0
- shrinkray-0.0.0.dist-info/RECORD +22 -0
- shrinkray-0.0.0.dist-info/WHEEL +4 -0
- shrinkray-0.0.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import subprocess
|
|
3
|
+
from glob import glob
|
|
4
|
+
from shutil import which
|
|
5
|
+
from tempfile import NamedTemporaryFile
|
|
6
|
+
|
|
7
|
+
import trio
|
|
8
|
+
|
|
9
|
+
from shrinkray.passes.definitions import ReductionPump
|
|
10
|
+
from shrinkray.problem import ReductionProblem
|
|
11
|
+
from shrinkray.work import NotFound
|
|
12
|
+
|
|
13
|
+
C_FILE_EXTENSIONS = (".c", ".cpp", ".h", ".hpp", ".cxx", ".cc")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def find_clang_delta():
|
|
17
|
+
clang_delta = which("clang_delta") or ""
|
|
18
|
+
if not clang_delta:
|
|
19
|
+
possible_locations = glob(
|
|
20
|
+
"/opt/homebrew//Cellar/creduce/*/libexec/clang_delta"
|
|
21
|
+
) + glob("/usr/libexec/clang_delta")
|
|
22
|
+
if possible_locations:
|
|
23
|
+
clang_delta = max(possible_locations)
|
|
24
|
+
return clang_delta
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
TRANSFORMATIONS: list[str] = [
|
|
28
|
+
"aggregate-to-scalar",
|
|
29
|
+
"binop-simplification",
|
|
30
|
+
"callexpr-to-value",
|
|
31
|
+
"class-template-to-class",
|
|
32
|
+
"combine-global-var",
|
|
33
|
+
"combine-local-var",
|
|
34
|
+
"copy-propagation",
|
|
35
|
+
"empty-struct-to-int",
|
|
36
|
+
"expression-detector",
|
|
37
|
+
"instantiate-template-param",
|
|
38
|
+
"instantiate-template-type-param-to-int",
|
|
39
|
+
"lift-assignment-expr",
|
|
40
|
+
"local-to-global",
|
|
41
|
+
"move-function-body",
|
|
42
|
+
"move-global-var",
|
|
43
|
+
"param-to-global",
|
|
44
|
+
"param-to-local",
|
|
45
|
+
"reduce-array-dim",
|
|
46
|
+
"reduce-array-size",
|
|
47
|
+
"reduce-class-template-param",
|
|
48
|
+
"reduce-pointer-level",
|
|
49
|
+
"reduce-pointer-pairs",
|
|
50
|
+
"remove-addr-taken",
|
|
51
|
+
"remove-array",
|
|
52
|
+
"remove-base-class",
|
|
53
|
+
"remove-ctor-initializer",
|
|
54
|
+
"remove-enum-member-value",
|
|
55
|
+
"remove-namespace",
|
|
56
|
+
"remove-nested-function",
|
|
57
|
+
"remove-pointer",
|
|
58
|
+
"remove-trivial-base-template",
|
|
59
|
+
"remove-unresolved-base",
|
|
60
|
+
"remove-unused-enum-member",
|
|
61
|
+
"remove-unused-field",
|
|
62
|
+
"remove-unused-function",
|
|
63
|
+
"remove-unused-outer-class",
|
|
64
|
+
"remove-unused-var",
|
|
65
|
+
"rename-class",
|
|
66
|
+
"rename-cxx-method",
|
|
67
|
+
"rename-fun",
|
|
68
|
+
"rename-param",
|
|
69
|
+
"rename-var",
|
|
70
|
+
"replace-array-access-with-index",
|
|
71
|
+
"replace-array-index-var",
|
|
72
|
+
"replace-callexpr",
|
|
73
|
+
"replace-class-with-base-template-spec",
|
|
74
|
+
"replace-dependent-name",
|
|
75
|
+
"replace-dependent-typedef",
|
|
76
|
+
"replace-derived-class",
|
|
77
|
+
"replace-function-def-with-decl",
|
|
78
|
+
"replace-one-level-typedef-type",
|
|
79
|
+
"replace-simple-typedef",
|
|
80
|
+
"replace-undefined-function",
|
|
81
|
+
"return-void",
|
|
82
|
+
"simple-inliner",
|
|
83
|
+
"simplify-callexpr",
|
|
84
|
+
"simplify-comma-expr",
|
|
85
|
+
"simplify-dependent-typedef",
|
|
86
|
+
"simplify-if",
|
|
87
|
+
"simplify-nested-class",
|
|
88
|
+
"simplify-recursive-template-instantiation",
|
|
89
|
+
"simplify-struct",
|
|
90
|
+
"simplify-struct-union-decl",
|
|
91
|
+
"template-arg-to-int",
|
|
92
|
+
"template-non-type-arg-to-int",
|
|
93
|
+
"unify-function-decl",
|
|
94
|
+
"union-to-struct",
|
|
95
|
+
"vector-to-array",
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class ClangDelta:
|
|
100
|
+
def __init__(self, path: str):
|
|
101
|
+
self.path_to_exec = path
|
|
102
|
+
|
|
103
|
+
self.transformations: list[str] = TRANSFORMATIONS
|
|
104
|
+
|
|
105
|
+
def __validate_transformation(self, transformation: str) -> None:
|
|
106
|
+
if transformation not in self.transformations:
|
|
107
|
+
raise ValueError(f"Invalid transformation {transformation}")
|
|
108
|
+
|
|
109
|
+
async def query_instances(self, transformation: str, data: bytes) -> int:
|
|
110
|
+
self.__validate_transformation(transformation)
|
|
111
|
+
with NamedTemporaryFile(suffix=".cpp", delete_on_close=False) as tmp:
|
|
112
|
+
tmp.write(data)
|
|
113
|
+
tmp.close()
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
results = (
|
|
117
|
+
await trio.run_process(
|
|
118
|
+
[
|
|
119
|
+
self.path_to_exec,
|
|
120
|
+
f"--query-instances={transformation}",
|
|
121
|
+
tmp.name,
|
|
122
|
+
],
|
|
123
|
+
capture_stdout=True,
|
|
124
|
+
capture_stderr=True,
|
|
125
|
+
)
|
|
126
|
+
).stdout
|
|
127
|
+
except subprocess.CalledProcessError as e:
|
|
128
|
+
msg = (e.stdout + e.stderr).strip()
|
|
129
|
+
if msg == b"Error: Unsupported file type!":
|
|
130
|
+
raise ValueError("Not a C or C++ test case")
|
|
131
|
+
elif b"Assertion failed" in msg:
|
|
132
|
+
return 0
|
|
133
|
+
else:
|
|
134
|
+
raise ClangDeltaError(msg)
|
|
135
|
+
finally:
|
|
136
|
+
os.unlink(tmp.name)
|
|
137
|
+
|
|
138
|
+
prefix = b"Available transformation instances:"
|
|
139
|
+
assert results.startswith(prefix)
|
|
140
|
+
return int(results[len(prefix) :].strip().decode("ascii"))
|
|
141
|
+
|
|
142
|
+
async def apply_transformation(
|
|
143
|
+
self, transformation: str, counter: int, data: bytes
|
|
144
|
+
) -> bytes:
|
|
145
|
+
self.__validate_transformation(transformation)
|
|
146
|
+
with NamedTemporaryFile(suffix=".cpp", delete_on_close=False) as tmp:
|
|
147
|
+
tmp.write(data)
|
|
148
|
+
tmp.close()
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
return (
|
|
152
|
+
await trio.run_process(
|
|
153
|
+
[
|
|
154
|
+
self.path_to_exec,
|
|
155
|
+
f"--transformation={transformation}",
|
|
156
|
+
f"--counter={int(counter)}",
|
|
157
|
+
tmp.name,
|
|
158
|
+
],
|
|
159
|
+
capture_stdout=True,
|
|
160
|
+
capture_stderr=True,
|
|
161
|
+
)
|
|
162
|
+
).stdout
|
|
163
|
+
except subprocess.CalledProcessError as e:
|
|
164
|
+
if e.stdout.strip() == b"Error: Unsupported file type!":
|
|
165
|
+
raise ValueError("Not a C or C++ test case")
|
|
166
|
+
elif (
|
|
167
|
+
e.stdout.strip()
|
|
168
|
+
== b"Error: No modification to the transformed program!"
|
|
169
|
+
):
|
|
170
|
+
return data
|
|
171
|
+
else:
|
|
172
|
+
raise ClangDeltaError(e.stdout + e.stderr)
|
|
173
|
+
finally:
|
|
174
|
+
os.unlink(tmp.name)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class ClangDeltaError(Exception):
|
|
178
|
+
pass
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def clang_delta_pump(
|
|
182
|
+
clang_delta: ClangDelta, transformation: str
|
|
183
|
+
) -> ReductionPump[bytes]:
|
|
184
|
+
async def apply(problem: ReductionProblem[bytes]) -> bytes:
|
|
185
|
+
target = problem.current_test_case
|
|
186
|
+
assert target is not None
|
|
187
|
+
try:
|
|
188
|
+
n = await clang_delta.query_instances(transformation, target)
|
|
189
|
+
except ValueError:
|
|
190
|
+
import traceback
|
|
191
|
+
|
|
192
|
+
traceback.print_exc()
|
|
193
|
+
return target
|
|
194
|
+
i = 1
|
|
195
|
+
while i <= n:
|
|
196
|
+
|
|
197
|
+
async def can_apply(j: int) -> bool:
|
|
198
|
+
attempt = await clang_delta.apply_transformation(
|
|
199
|
+
transformation, j, target
|
|
200
|
+
)
|
|
201
|
+
assert attempt is not None
|
|
202
|
+
if attempt == target:
|
|
203
|
+
return False
|
|
204
|
+
return await problem.is_interesting(attempt)
|
|
205
|
+
|
|
206
|
+
try:
|
|
207
|
+
i = await problem.work.find_first_value(range(i, n + 1), can_apply)
|
|
208
|
+
except NotFound:
|
|
209
|
+
break
|
|
210
|
+
except ClangDeltaError as e:
|
|
211
|
+
# Clang delta has a large number of internal assertions that you can trigger
|
|
212
|
+
# if you feed it bad enough C++. We solve this problem by ignoring it.
|
|
213
|
+
if b"Assertion failed" in e.args[0]:
|
|
214
|
+
return target
|
|
215
|
+
|
|
216
|
+
target = await clang_delta.apply_transformation(transformation, i, target)
|
|
217
|
+
assert target is not None
|
|
218
|
+
n = await clang_delta.query_instances(transformation, target)
|
|
219
|
+
return target
|
|
220
|
+
|
|
221
|
+
apply.__name__ = f"clang_delta({transformation})"
|
|
222
|
+
|
|
223
|
+
return apply
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def clang_delta_pumps(clang_delta: ClangDelta) -> list[ReductionPump[bytes]]:
|
|
227
|
+
return [
|
|
228
|
+
clang_delta_pump(clang_delta, transformation)
|
|
229
|
+
for transformation in clang_delta.transformations
|
|
230
|
+
]
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from functools import wraps
|
|
3
|
+
from typing import Awaitable, Callable, Generic, TypeVar
|
|
4
|
+
|
|
5
|
+
from shrinkray.problem import ReductionProblem
|
|
6
|
+
|
|
7
|
+
S = TypeVar("S")
|
|
8
|
+
T = TypeVar("T")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
ReductionPass = Callable[[ReductionProblem[T]], Awaitable[None]]
|
|
12
|
+
ReductionPump = Callable[[ReductionProblem[T]], Awaitable[T]]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ParseError(Exception):
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Format(Generic[S, T], ABC):
|
|
20
|
+
@property
|
|
21
|
+
def name(self) -> str:
|
|
22
|
+
return repr(self)
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def parse(self, input: S) -> T: ...
|
|
26
|
+
|
|
27
|
+
def is_valid(self, input: S) -> bool:
|
|
28
|
+
try:
|
|
29
|
+
self.parse(input)
|
|
30
|
+
return True
|
|
31
|
+
except ParseError:
|
|
32
|
+
return False
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def dumps(self, input: T) -> S: ...
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def compose(format: Format[S, T], reduction_pass: ReductionPass[T]) -> ReductionPass[S]:
|
|
39
|
+
@wraps(reduction_pass)
|
|
40
|
+
async def wrapped_pass(problem: ReductionProblem[S]) -> None:
|
|
41
|
+
view = problem.view(format)
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
view.current_test_case
|
|
45
|
+
except ParseError:
|
|
46
|
+
return
|
|
47
|
+
|
|
48
|
+
await reduction_pass(view)
|
|
49
|
+
|
|
50
|
+
wrapped_pass.__name__ = f"{format.name}/{reduction_pass.__name__}"
|
|
51
|
+
|
|
52
|
+
return wrapped_pass
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module of reduction passes designed for "things that look like programming languages".
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from functools import wraps
|
|
7
|
+
from string import ascii_lowercase, ascii_uppercase
|
|
8
|
+
from typing import AnyStr, Callable
|
|
9
|
+
|
|
10
|
+
import trio
|
|
11
|
+
from attr import define
|
|
12
|
+
|
|
13
|
+
from shrinkray.passes.bytes import ByteReplacement, delete_intervals
|
|
14
|
+
from shrinkray.passes.definitions import Format, ParseError, ReductionPass
|
|
15
|
+
from shrinkray.passes.patching import PatchApplier, Patches, apply_patches
|
|
16
|
+
from shrinkray.problem import BasicReductionProblem, ReductionProblem
|
|
17
|
+
from shrinkray.work import NotFound
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@define(frozen=True)
|
|
21
|
+
class Substring(Format[AnyStr, AnyStr]):
|
|
22
|
+
prefix: AnyStr
|
|
23
|
+
suffix: AnyStr
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def name(self) -> str:
|
|
27
|
+
return f"Substring({len(self.prefix)}, {len(self.suffix)})"
|
|
28
|
+
|
|
29
|
+
def parse(self, input: AnyStr) -> AnyStr:
|
|
30
|
+
if input.startswith(self.prefix) and input.endswith(self.suffix):
|
|
31
|
+
return input[len(self.prefix) : len(input) - len(self.suffix)]
|
|
32
|
+
else:
|
|
33
|
+
raise ParseError()
|
|
34
|
+
|
|
35
|
+
def dumps(self, input: AnyStr) -> AnyStr:
|
|
36
|
+
return self.prefix + input + self.suffix
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class RegionReplacingPatches(Patches[dict[int, AnyStr], AnyStr]):
|
|
40
|
+
def __init__(self, regions):
|
|
41
|
+
assert regions
|
|
42
|
+
for (_, v), (u, _) in zip(regions, regions[1:]):
|
|
43
|
+
assert v <= u
|
|
44
|
+
self.regions = regions
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def empty(self):
|
|
48
|
+
return {}
|
|
49
|
+
|
|
50
|
+
def combine(self, *patches):
|
|
51
|
+
result = {}
|
|
52
|
+
for p in patches:
|
|
53
|
+
result.update(p)
|
|
54
|
+
return result
|
|
55
|
+
|
|
56
|
+
def apply(self, patch, target):
|
|
57
|
+
empty = target[:0]
|
|
58
|
+
parts = []
|
|
59
|
+
prev = 0
|
|
60
|
+
for j, (u, v) in enumerate(self.regions):
|
|
61
|
+
assert v <= len(target)
|
|
62
|
+
parts.append(target[prev:u])
|
|
63
|
+
try:
|
|
64
|
+
parts.append(patch[j])
|
|
65
|
+
except KeyError:
|
|
66
|
+
parts.append(target[u:v])
|
|
67
|
+
prev = v
|
|
68
|
+
parts.append(target[prev:])
|
|
69
|
+
return empty.join(parts)
|
|
70
|
+
|
|
71
|
+
def size(self, patch):
|
|
72
|
+
total = 0
|
|
73
|
+
for i, s in patch.items():
|
|
74
|
+
u, v = self.regions[i]
|
|
75
|
+
return v - u - len(s)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def regex_pass(
|
|
79
|
+
pattern: AnyStr | re.Pattern[AnyStr],
|
|
80
|
+
flags: re.RegexFlag = 0,
|
|
81
|
+
) -> Callable[[ReductionPass[AnyStr]], ReductionPass[AnyStr]]:
|
|
82
|
+
if not isinstance(pattern, re.Pattern):
|
|
83
|
+
pattern = re.compile(pattern, flags=flags)
|
|
84
|
+
|
|
85
|
+
def inner(fn: ReductionPass[AnyStr]) -> ReductionPass[AnyStr]:
|
|
86
|
+
@wraps(fn)
|
|
87
|
+
async def reduction_pass(problem: ReductionProblem[AnyStr]) -> None:
|
|
88
|
+
matching_regions = []
|
|
89
|
+
initial_values_for_regions = []
|
|
90
|
+
|
|
91
|
+
i = 0
|
|
92
|
+
while i < len(problem.current_test_case):
|
|
93
|
+
search = pattern.search(problem.current_test_case, i)
|
|
94
|
+
if search is None:
|
|
95
|
+
break
|
|
96
|
+
|
|
97
|
+
u, v = search.span()
|
|
98
|
+
matching_regions.append((u, v))
|
|
99
|
+
initial_values_for_regions.append(problem.current_test_case[u:v])
|
|
100
|
+
|
|
101
|
+
i = v
|
|
102
|
+
|
|
103
|
+
if not matching_regions:
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
patches = RegionReplacingPatches(matching_regions)
|
|
107
|
+
|
|
108
|
+
patch_applier = PatchApplier(patches, problem)
|
|
109
|
+
|
|
110
|
+
async with trio.open_nursery() as nursery:
|
|
111
|
+
|
|
112
|
+
async def reduce_region(i: int) -> None:
|
|
113
|
+
async def is_interesting(s):
|
|
114
|
+
return await patch_applier.try_apply_patch({i: s})
|
|
115
|
+
|
|
116
|
+
subproblem = BasicReductionProblem(
|
|
117
|
+
initial_values_for_regions[i],
|
|
118
|
+
is_interesting,
|
|
119
|
+
work=problem.work,
|
|
120
|
+
)
|
|
121
|
+
nursery.start_soon(fn, subproblem)
|
|
122
|
+
|
|
123
|
+
for i in range(len(matching_regions)):
|
|
124
|
+
await reduce_region(i)
|
|
125
|
+
|
|
126
|
+
return reduction_pass
|
|
127
|
+
|
|
128
|
+
return inner
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
async def reduce_integer(problem: ReductionProblem[int]) -> None:
|
|
132
|
+
assert problem.current_test_case >= 0
|
|
133
|
+
|
|
134
|
+
if await problem.is_interesting(0):
|
|
135
|
+
return
|
|
136
|
+
|
|
137
|
+
lo = 0
|
|
138
|
+
hi = problem.current_test_case
|
|
139
|
+
|
|
140
|
+
while lo + 1 < hi:
|
|
141
|
+
mid = (lo + hi) // 2
|
|
142
|
+
if await problem.is_interesting(mid):
|
|
143
|
+
hi = mid
|
|
144
|
+
else:
|
|
145
|
+
lo = mid
|
|
146
|
+
|
|
147
|
+
if await problem.is_interesting(hi - 1):
|
|
148
|
+
hi -= 1
|
|
149
|
+
|
|
150
|
+
if await problem.is_interesting(lo + 1):
|
|
151
|
+
return
|
|
152
|
+
else:
|
|
153
|
+
lo += 1
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class IntegerFormat(Format[bytes, int]):
|
|
157
|
+
def parse(self, input: bytes) -> int:
|
|
158
|
+
try:
|
|
159
|
+
return int(input.decode("ascii"))
|
|
160
|
+
except (ValueError, UnicodeDecodeError):
|
|
161
|
+
raise ParseError()
|
|
162
|
+
|
|
163
|
+
def dumps(self, input: int) -> bytes:
|
|
164
|
+
return str(input).encode("ascii")
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@regex_pass(b"[0-9]+")
|
|
168
|
+
async def reduce_integer_literals(problem: ReductionProblem[bytes]) -> None:
|
|
169
|
+
await reduce_integer(problem.view(IntegerFormat()))
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@regex_pass(rb"[0-9]+ [*+-/] [0-9]+")
|
|
173
|
+
async def combine_expressions(problem: ReductionProblem[bytes]) -> None:
|
|
174
|
+
try:
|
|
175
|
+
# NB: Use of eval is safe, as everything passed to this is a simple
|
|
176
|
+
# arithmetic expression. Would ideally replace with a guaranteed
|
|
177
|
+
# safe version though.
|
|
178
|
+
await problem.is_interesting(
|
|
179
|
+
str(eval(problem.current_test_case)).encode("ascii")
|
|
180
|
+
)
|
|
181
|
+
except ArithmeticError:
|
|
182
|
+
pass
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
@regex_pass(rb'([\'"])\s*\1')
|
|
186
|
+
async def merge_adjacent_strings(problem: ReductionProblem[bytes]) -> None:
|
|
187
|
+
await problem.is_interesting(b"")
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@regex_pass(rb"''|\"\"|false|\(\)|\[\]", re.IGNORECASE)
|
|
191
|
+
async def replace_falsey_with_zero(problem: ReductionProblem[bytes]) -> None:
|
|
192
|
+
await problem.is_interesting(b"0")
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
async def simplify_brackets(problem: ReductionProblem[bytes]) -> None:
|
|
196
|
+
bracket_types = [b"[]", b"{}", b"()"]
|
|
197
|
+
|
|
198
|
+
patches = [dict(zip(u, v)) for u in bracket_types for v in bracket_types if u > v]
|
|
199
|
+
|
|
200
|
+
await apply_patches(problem, ByteReplacement(), patches)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
IDENTIFIER = re.compile(rb"(\b[A-Za-z][A-Za-z0-9_]*\b)|([0-9]+)")
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def shortlex(s):
|
|
207
|
+
return (len(s), s)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
async def normalize_identifiers(problem: ReductionProblem[bytes]) -> None:
|
|
211
|
+
identifiers = {m.group(0) for m in IDENTIFIER.finditer(problem.current_test_case)}
|
|
212
|
+
replacements = set(identifiers)
|
|
213
|
+
|
|
214
|
+
for char_type in [ascii_lowercase, ascii_uppercase]:
|
|
215
|
+
for cc in char_type.encode("ascii"):
|
|
216
|
+
c = bytes([cc])
|
|
217
|
+
if c not in replacements:
|
|
218
|
+
replacements.add(c)
|
|
219
|
+
break
|
|
220
|
+
|
|
221
|
+
replacements = sorted(replacements, key=shortlex)
|
|
222
|
+
targets = sorted(identifiers, key=shortlex, reverse=True)
|
|
223
|
+
|
|
224
|
+
# TODO: This could use better parallelisation.
|
|
225
|
+
for t in targets:
|
|
226
|
+
pattern = re.compile(rb"\b" + t + rb"\b")
|
|
227
|
+
source = problem.current_test_case
|
|
228
|
+
if not pattern.search(source):
|
|
229
|
+
continue
|
|
230
|
+
|
|
231
|
+
async def can_replace(r):
|
|
232
|
+
if shortlex(r) >= shortlex(t):
|
|
233
|
+
return False
|
|
234
|
+
attempt = pattern.sub(r, source)
|
|
235
|
+
assert attempt != source
|
|
236
|
+
return await problem.is_interesting(attempt)
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
await problem.work.find_first_value(replacements, can_replace)
|
|
240
|
+
except NotFound:
|
|
241
|
+
pass
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def iter_indices(s, substring):
|
|
245
|
+
try:
|
|
246
|
+
i = s.index(substring)
|
|
247
|
+
yield i
|
|
248
|
+
while True:
|
|
249
|
+
i = s.index(substring, i + 1)
|
|
250
|
+
yield i
|
|
251
|
+
except ValueError:
|
|
252
|
+
return
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
async def cut_comments(problem: ReductionProblem[bytes], start, end, include_end=True):
|
|
256
|
+
cuts = []
|
|
257
|
+
target = problem.current_test_case
|
|
258
|
+
# python comments
|
|
259
|
+
for i in iter_indices(target, start):
|
|
260
|
+
try:
|
|
261
|
+
j = target.index(end, i + 1)
|
|
262
|
+
except ValueError:
|
|
263
|
+
if include_end:
|
|
264
|
+
continue
|
|
265
|
+
j = len(target)
|
|
266
|
+
if include_end:
|
|
267
|
+
cuts.append((i, j + len(end)))
|
|
268
|
+
else:
|
|
269
|
+
cuts.append((i, j))
|
|
270
|
+
await delete_intervals(problem, cuts)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
async def cut_comment_like_things(problem: ReductionProblem[bytes]):
|
|
274
|
+
await cut_comments(problem, b"#", b"\n", include_end=False)
|
|
275
|
+
await cut_comments(problem, b"//", b"\n", include_end=False)
|
|
276
|
+
await cut_comments(problem, b'"""', b'"""')
|
|
277
|
+
await cut_comments(problem, b"/*", b"*/")
|
shrinkray/passes/json.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from copy import deepcopy
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from attrs import define
|
|
6
|
+
|
|
7
|
+
from shrinkray.passes.definitions import Format, ParseError, ReductionPass
|
|
8
|
+
from shrinkray.passes.patching import Patches, apply_patches
|
|
9
|
+
from shrinkray.problem import ReductionProblem
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def is_json(s: bytes) -> bool:
|
|
13
|
+
try:
|
|
14
|
+
json.loads(s)
|
|
15
|
+
return True
|
|
16
|
+
except ValueError:
|
|
17
|
+
return False
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@define(frozen=True)
|
|
21
|
+
class _JSON(Format[bytes, Any]):
|
|
22
|
+
def __repr__(self) -> str:
|
|
23
|
+
return "JSON"
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def name(self) -> str:
|
|
27
|
+
return "JSON"
|
|
28
|
+
|
|
29
|
+
def parse(self, input: bytes) -> Any:
|
|
30
|
+
try:
|
|
31
|
+
return json.loads(input)
|
|
32
|
+
except (json.JSONDecodeError, UnicodeDecodeError) as e:
|
|
33
|
+
raise ParseError(*e.args)
|
|
34
|
+
|
|
35
|
+
def dumps(self, input: Any) -> bytes:
|
|
36
|
+
return json.dumps(input).encode("utf-8")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
JSON = _JSON()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def gather_identifiers(value: Any) -> set[str]:
|
|
43
|
+
result = set()
|
|
44
|
+
stack = [value]
|
|
45
|
+
while stack:
|
|
46
|
+
target = stack.pop()
|
|
47
|
+
if isinstance(target, dict):
|
|
48
|
+
result.update(target.keys())
|
|
49
|
+
stack.extend(target.values())
|
|
50
|
+
elif isinstance(target, list):
|
|
51
|
+
stack.extend(target)
|
|
52
|
+
return result
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class DeleteIdentifiers(Patches[frozenset[str], Any]):
|
|
56
|
+
@property
|
|
57
|
+
def empty(self) -> frozenset[str]:
|
|
58
|
+
return frozenset()
|
|
59
|
+
|
|
60
|
+
def combine(self, *patches: frozenset[str]) -> frozenset[str]:
|
|
61
|
+
result = set()
|
|
62
|
+
for p in patches:
|
|
63
|
+
result.update(p)
|
|
64
|
+
return frozenset(result)
|
|
65
|
+
|
|
66
|
+
def apply(self, patch: frozenset[str], target: Any) -> Any:
|
|
67
|
+
target = deepcopy(target)
|
|
68
|
+
stack = [target]
|
|
69
|
+
while stack:
|
|
70
|
+
value = stack.pop()
|
|
71
|
+
if isinstance(value, dict):
|
|
72
|
+
for k in patch:
|
|
73
|
+
value.pop(k, None)
|
|
74
|
+
stack.extend(value.values())
|
|
75
|
+
elif isinstance(value, list):
|
|
76
|
+
stack.extend(value)
|
|
77
|
+
return target
|
|
78
|
+
|
|
79
|
+
def size(self, patch: frozenset[str]) -> int:
|
|
80
|
+
return len(patch)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
async def delete_identifiers(problem: ReductionProblem[Any]):
|
|
84
|
+
identifiers = gather_identifiers(problem.current_test_case)
|
|
85
|
+
|
|
86
|
+
await apply_patches(
|
|
87
|
+
problem, DeleteIdentifiers(), [frozenset({id}) for id in identifiers]
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
JSON_PASSES: list[ReductionPass[Any]] = [delete_identifiers]
|