shrinkray 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shrinkray/__init__.py +1 -0
- shrinkray/__main__.py +1205 -0
- shrinkray/learning.py +221 -0
- shrinkray/passes/__init__.py +0 -0
- shrinkray/passes/bytes.py +547 -0
- shrinkray/passes/clangdelta.py +230 -0
- shrinkray/passes/definitions.py +52 -0
- shrinkray/passes/genericlanguages.py +277 -0
- shrinkray/passes/json.py +91 -0
- shrinkray/passes/patching.py +280 -0
- shrinkray/passes/python.py +176 -0
- shrinkray/passes/sat.py +176 -0
- shrinkray/passes/sequences.py +69 -0
- shrinkray/problem.py +318 -0
- shrinkray/py.typed +0 -0
- shrinkray/reducer.py +430 -0
- shrinkray/work.py +217 -0
- shrinkray-0.0.0.dist-info/LICENSE +21 -0
- shrinkray-0.0.0.dist-info/METADATA +170 -0
- shrinkray-0.0.0.dist-info/RECORD +22 -0
- shrinkray-0.0.0.dist-info/WHEEL +4 -0
- shrinkray-0.0.0.dist-info/entry_points.txt +3 -0
shrinkray/learning.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import os
|
|
3
|
+
import random
|
|
4
|
+
import re
|
|
5
|
+
import site
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
from glob import glob
|
|
9
|
+
from typing import Awaitable, Callable
|
|
10
|
+
|
|
11
|
+
import trio
|
|
12
|
+
from attrs import define
|
|
13
|
+
|
|
14
|
+
from shrinkray.problem import BasicReductionProblem
|
|
15
|
+
from shrinkray.reducer import ShrinkRay
|
|
16
|
+
from shrinkray.work import WorkContext
|
|
17
|
+
|
|
18
|
+
WHITESPACE = re.compile(rb"\s+")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def whitespace_normalize(s):
|
|
22
|
+
return WHITESPACE.sub(b" ", s).strip()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def threshold_value(seed: bytes, test_case: bytes) -> float:
|
|
26
|
+
return random.Random(hashlib.sha1(seed + b":" + test_case).digest()).random()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@define
|
|
30
|
+
class RandomInterestingnessTest:
|
|
31
|
+
seed: bytes
|
|
32
|
+
threshold: float
|
|
33
|
+
key_substring: bytes
|
|
34
|
+
base_interestingness: Callable[[bytes], Awaitable[bool]]
|
|
35
|
+
|
|
36
|
+
async def __call__(self, test_case: bytes) -> bool:
|
|
37
|
+
if threshold_value(self.seed, test_case) > self.threshold:
|
|
38
|
+
await trio.lowlevel.checkpoint()
|
|
39
|
+
return False
|
|
40
|
+
normalized = whitespace_normalize(test_case)
|
|
41
|
+
if self.key_substring not in normalized:
|
|
42
|
+
await trio.lowlevel.checkpoint()
|
|
43
|
+
return False
|
|
44
|
+
return await self.base_interestingness(test_case)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class CouldNotLearn(Exception):
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class CorpusExplorer:
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
is_interesting: Callable[[bytes], Awaitable[bool]],
|
|
55
|
+
corpus: list[str],
|
|
56
|
+
parallelism=os.cpu_count(),
|
|
57
|
+
):
|
|
58
|
+
self.__is_interesting = is_interesting
|
|
59
|
+
self.__is_interesting_cache = {}
|
|
60
|
+
self.__corpus_files = [t for t in corpus if os.stat(t).st_size > 0]
|
|
61
|
+
self.__corpus_data = []
|
|
62
|
+
self.__limit = trio.CapacityLimiter(parallelism)
|
|
63
|
+
self.__wait_for_me = object()
|
|
64
|
+
self.__random = random.Random()
|
|
65
|
+
self.__parallelism = parallelism
|
|
66
|
+
|
|
67
|
+
async def is_interesting(self, tc):
|
|
68
|
+
try:
|
|
69
|
+
result = self.__is_interesting_cache[tc]
|
|
70
|
+
if result != self.__wait_for_me:
|
|
71
|
+
await trio.lowlevel.checkpoint()
|
|
72
|
+
return result
|
|
73
|
+
while result == self.__wait_for_me:
|
|
74
|
+
await trio.sleep(0.01)
|
|
75
|
+
result = self.__is_interesting_cache[tc]
|
|
76
|
+
assert result is not self.__wait_for_me
|
|
77
|
+
return result
|
|
78
|
+
except KeyError:
|
|
79
|
+
self.__is_interesting_cache[tc] = self.__wait_for_me
|
|
80
|
+
async with self.__limit:
|
|
81
|
+
result = await self.__is_interesting(tc)
|
|
82
|
+
self.__is_interesting_cache[tc] = result
|
|
83
|
+
return result
|
|
84
|
+
|
|
85
|
+
async def random_corpus_member(self):
|
|
86
|
+
return list(await self.random_corpus_sample(1))[0]
|
|
87
|
+
|
|
88
|
+
async def random_corpus_sample(self, max_elements=100):
|
|
89
|
+
result = []
|
|
90
|
+
max_from_data = len(self.__corpus_data)
|
|
91
|
+
|
|
92
|
+
async def run():
|
|
93
|
+
nonlocal max_from_data
|
|
94
|
+
while len(result) < max_elements and (
|
|
95
|
+
max_from_data > 0 or self.__corpus_files
|
|
96
|
+
):
|
|
97
|
+
i = self.__random.randrange(0, len(self.__corpus_files) + max_from_data)
|
|
98
|
+
if i < len(self.__corpus_files):
|
|
99
|
+
j = len(self.__corpus_files) - 1
|
|
100
|
+
self.__corpus_files[i], self.__corpus_files[j] = (
|
|
101
|
+
self.__corpus_files[j],
|
|
102
|
+
self.__corpus_files[i],
|
|
103
|
+
)
|
|
104
|
+
f = self.__corpus_files.pop()
|
|
105
|
+
with open(f, "rb") as input:
|
|
106
|
+
data = input.read()
|
|
107
|
+
|
|
108
|
+
if await self.is_interesting(data):
|
|
109
|
+
self.__corpus_data.append(data)
|
|
110
|
+
result.append(data)
|
|
111
|
+
else:
|
|
112
|
+
i -= len(self.__corpus_files)
|
|
113
|
+
j = max_from_data - 1
|
|
114
|
+
self.__corpus_data[i], self.__corpus_data[j] = (
|
|
115
|
+
self.__corpus_data[j],
|
|
116
|
+
self.__corpus_data[i],
|
|
117
|
+
)
|
|
118
|
+
result.append(self.__corpus_data[j])
|
|
119
|
+
max_from_data -= 1
|
|
120
|
+
await trio.lowlevel.checkpoint()
|
|
121
|
+
|
|
122
|
+
if self.__parallelism > 1:
|
|
123
|
+
async with trio.open_nursery() as nursery:
|
|
124
|
+
for _ in range(self.__parallelism):
|
|
125
|
+
nursery.start_soon(run)
|
|
126
|
+
else:
|
|
127
|
+
await run()
|
|
128
|
+
return result[:max_elements]
|
|
129
|
+
|
|
130
|
+
async def random_reduction_problem(self):
|
|
131
|
+
while True:
|
|
132
|
+
sample = await self.random_corpus_sample(400)
|
|
133
|
+
|
|
134
|
+
index = defaultdict(list)
|
|
135
|
+
|
|
136
|
+
for i, s in enumerate(sample):
|
|
137
|
+
for j, c in enumerate(whitespace_normalize(s)):
|
|
138
|
+
index[c].append((i, j))
|
|
139
|
+
|
|
140
|
+
queue = dequeue()
|
|
141
|
+
|
|
142
|
+
strings = [b""]
|
|
143
|
+
|
|
144
|
+
if not normalized:
|
|
145
|
+
continue
|
|
146
|
+
while True:
|
|
147
|
+
start = self.__random.randrange(0, len(normalized))
|
|
148
|
+
if not WHITESPACE.match(normalized[start : start + 1]):
|
|
149
|
+
break
|
|
150
|
+
while True:
|
|
151
|
+
end = self.__random.randint(start, min(start + 20, len(normalized) - 1))
|
|
152
|
+
if not WHITESPACE.match(normalized[end : end + 1]):
|
|
153
|
+
break
|
|
154
|
+
substring = normalized[start:end]
|
|
155
|
+
while True:
|
|
156
|
+
seed = str(self.__random.randint(0, 10**6)).encode("ascii")
|
|
157
|
+
value = threshold_value(seed=seed, test_case=initial)
|
|
158
|
+
if value >= 0.5:
|
|
159
|
+
threshold = self.__random.random() * (1.0 - value) + value
|
|
160
|
+
break
|
|
161
|
+
|
|
162
|
+
return initial, RandomInterestingnessTest(
|
|
163
|
+
seed=seed,
|
|
164
|
+
threshold=threshold,
|
|
165
|
+
key_substring=substring,
|
|
166
|
+
base_interestingness=self.is_interesting,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
async def full_shrink(self, initial, condition) -> bytes:
|
|
170
|
+
problem: BasicReductionProblem[bytes] = BasicReductionProblem(
|
|
171
|
+
initial=initial,
|
|
172
|
+
is_interesting=condition,
|
|
173
|
+
work=WorkContext(parallelism=self.__parallelism, random=self.__random),
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
reducer = ShrinkRay(
|
|
177
|
+
target=problem,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
await reducer.run()
|
|
181
|
+
|
|
182
|
+
return problem.current_test_case
|
|
183
|
+
|
|
184
|
+
async def all_shrinks(self, sample, condition):
|
|
185
|
+
shrinks = set()
|
|
186
|
+
|
|
187
|
+
async def run_and_add(c, condition):
|
|
188
|
+
shrinks.add(await self.full_shrink(c, condition))
|
|
189
|
+
|
|
190
|
+
async with trio.open_nursery() as nursery:
|
|
191
|
+
for d in sample:
|
|
192
|
+
nursery.start_soon(run_and_add, d, condition)
|
|
193
|
+
|
|
194
|
+
return shrinks
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def python_files() -> list[str]:
|
|
198
|
+
return [
|
|
199
|
+
f
|
|
200
|
+
for d in site.getsitepackages()
|
|
201
|
+
for f in glob(os.path.join(d, "**", "*.py"), recursive=True)
|
|
202
|
+
]
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
206
|
+
|
|
207
|
+
IS_PYTHON_SCRIPT = os.path.join(ROOT, "scripts", "ispython.py")
|
|
208
|
+
|
|
209
|
+
assert os.path.exists(IS_PYTHON_SCRIPT), ROOT
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
async def is_python(data):
|
|
213
|
+
return (
|
|
214
|
+
await trio.run_process(
|
|
215
|
+
[sys.executable, IS_PYTHON_SCRIPT],
|
|
216
|
+
stdin=data,
|
|
217
|
+
stdout=subprocess.DEVNULL,
|
|
218
|
+
stderr=subprocess.DEVNULL,
|
|
219
|
+
check=False,
|
|
220
|
+
)
|
|
221
|
+
).returncode == 0
|
|
File without changes
|