shrinkray 25.12.27.1__py3-none-any.whl → 25.12.27.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shrinkray/passes/bytes.py +8 -7
- shrinkray/passes/definitions.py +3 -67
- shrinkray/passes/genericlanguages.py +14 -10
- shrinkray/passes/json.py +2 -2
- shrinkray/passes/sat.py +2 -7
- shrinkray/problem.py +257 -11
- shrinkray/reducer.py +9 -2
- shrinkray/state.py +8 -15
- {shrinkray-25.12.27.1.dist-info → shrinkray-25.12.27.2.dist-info}/METADATA +1 -28
- {shrinkray-25.12.27.1.dist-info → shrinkray-25.12.27.2.dist-info}/RECORD +14 -14
- {shrinkray-25.12.27.1.dist-info → shrinkray-25.12.27.2.dist-info}/WHEEL +0 -0
- {shrinkray-25.12.27.1.dist-info → shrinkray-25.12.27.2.dist-info}/entry_points.txt +0 -0
- {shrinkray-25.12.27.1.dist-info → shrinkray-25.12.27.2.dist-info}/licenses/LICENSE +0 -0
- {shrinkray-25.12.27.1.dist-info → shrinkray-25.12.27.2.dist-info}/top_level.txt +0 -0
shrinkray/passes/bytes.py
CHANGED
|
@@ -24,8 +24,8 @@ from collections.abc import Sequence
|
|
|
24
24
|
|
|
25
25
|
from attrs import define
|
|
26
26
|
|
|
27
|
-
from shrinkray.passes.definitions import Format, ReductionProblem
|
|
28
27
|
from shrinkray.passes.patching import Cuts, Patches, apply_patches
|
|
28
|
+
from shrinkray.problem import Format, ReductionProblem
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
@define(frozen=True)
|
|
@@ -739,12 +739,13 @@ async def line_sorter(problem: ReductionProblem[bytes]):
|
|
|
739
739
|
while i < len(lines):
|
|
740
740
|
j = i
|
|
741
741
|
while j > 0:
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
742
|
+
attempt = list(lines)
|
|
743
|
+
attempt[j - 1], attempt[j] = attempt[j], attempt[j - 1]
|
|
744
|
+
new_test_case = b"\n".join(attempt)
|
|
745
|
+
if problem.sort_key(new_test_case) < problem.sort_key(
|
|
746
|
+
problem.current_test_case
|
|
747
|
+
):
|
|
748
|
+
if not await problem.is_interesting(new_test_case):
|
|
748
749
|
break
|
|
749
750
|
else:
|
|
750
751
|
j -= 1
|
shrinkray/passes/definitions.py
CHANGED
|
@@ -4,20 +4,20 @@ This module defines the core type aliases and abstractions for reduction:
|
|
|
4
4
|
|
|
5
5
|
- ReductionPass[T]: A function that attempts to reduce a test case
|
|
6
6
|
- ReductionPump[T]: A function that may temporarily increase test case size
|
|
7
|
-
- Format[S, T]: A bidirectional transformation between types
|
|
8
7
|
- compose(): Combines a Format with a pass to work on a different type
|
|
9
8
|
|
|
10
9
|
These abstractions enable format-agnostic reduction: the same pass
|
|
11
10
|
(e.g., "delete duplicate elements") can work on bytes, lines, tokens,
|
|
12
11
|
JSON arrays, or any other sequence-like type.
|
|
12
|
+
|
|
13
|
+
Note: Format, ParseError, and DumpError are defined in shrinkray.problem.
|
|
13
14
|
"""
|
|
14
15
|
|
|
15
|
-
from abc import ABC, abstractmethod
|
|
16
16
|
from collections.abc import Awaitable, Callable
|
|
17
17
|
from functools import wraps
|
|
18
18
|
from typing import TypeVar
|
|
19
19
|
|
|
20
|
-
from shrinkray.problem import ReductionProblem
|
|
20
|
+
from shrinkray.problem import Format, ParseError, ReductionProblem
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
S = TypeVar("S")
|
|
@@ -36,70 +36,6 @@ ReductionPass = Callable[[ReductionProblem[T]], Awaitable[None]]
|
|
|
36
36
|
ReductionPump = Callable[[ReductionProblem[T]], Awaitable[T]]
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
class ParseError(Exception):
|
|
40
|
-
"""Raised when a Format cannot parse its input."""
|
|
41
|
-
|
|
42
|
-
pass
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
class DumpError(Exception):
|
|
46
|
-
"""Raised when a Format cannot serialize its output.
|
|
47
|
-
|
|
48
|
-
This occurs because not all internal representations map to valid
|
|
49
|
-
output in the target format. For example, a reduction might create
|
|
50
|
-
an invalid AST structure that cannot be converted back to source code.
|
|
51
|
-
"""
|
|
52
|
-
|
|
53
|
-
pass
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
class Format[S, T](ABC):
|
|
57
|
-
"""A bidirectional transformation between two types.
|
|
58
|
-
|
|
59
|
-
Formats enable format-agnostic passes by abstracting the
|
|
60
|
-
parse/serialize cycle. For example:
|
|
61
|
-
|
|
62
|
-
- Split(b"\\n"): bytes <-> list[bytes] (lines)
|
|
63
|
-
- Tokenize(): bytes <-> list[bytes] (tokens)
|
|
64
|
-
- JSON: bytes <-> Any (Python objects)
|
|
65
|
-
- DimacsCNF: bytes <-> list[list[int]] (SAT clauses)
|
|
66
|
-
|
|
67
|
-
A Format must satisfy the round-trip property:
|
|
68
|
-
dumps(parse(x)) should be equivalent to x
|
|
69
|
-
(possibly with normalization)
|
|
70
|
-
|
|
71
|
-
Example usage:
|
|
72
|
-
# Delete duplicate lines
|
|
73
|
-
compose(Split(b"\\n"), delete_duplicates)
|
|
74
|
-
|
|
75
|
-
# Reduce integer literals in source code
|
|
76
|
-
compose(IntegerFormat(), reduce_integer)
|
|
77
|
-
"""
|
|
78
|
-
|
|
79
|
-
@property
|
|
80
|
-
def name(self) -> str:
|
|
81
|
-
"""Human-readable name for this format, used in pass names."""
|
|
82
|
-
return repr(self)
|
|
83
|
-
|
|
84
|
-
@abstractmethod
|
|
85
|
-
def parse(self, input: S) -> T:
|
|
86
|
-
"""Parse input into the target type. Raises ParseError on failure."""
|
|
87
|
-
...
|
|
88
|
-
|
|
89
|
-
def is_valid(self, input: S) -> bool:
|
|
90
|
-
"""Check if input can be parsed by this format."""
|
|
91
|
-
try:
|
|
92
|
-
self.parse(input)
|
|
93
|
-
return True
|
|
94
|
-
except ParseError:
|
|
95
|
-
return False
|
|
96
|
-
|
|
97
|
-
@abstractmethod
|
|
98
|
-
def dumps(self, input: T) -> S:
|
|
99
|
-
"""Serialize the target type back to the source type."""
|
|
100
|
-
...
|
|
101
|
-
|
|
102
|
-
|
|
103
39
|
def compose(format: Format[S, T], reduction_pass: ReductionPass[T]) -> ReductionPass[S]:
|
|
104
40
|
"""Wrap a reduction pass to work through a Format transformation.
|
|
105
41
|
|
|
@@ -3,7 +3,7 @@ Module of reduction passes designed for "things that look like programming langu
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import re
|
|
6
|
-
from collections.abc import Callable
|
|
6
|
+
from collections.abc import Callable
|
|
7
7
|
from functools import wraps
|
|
8
8
|
from string import ascii_lowercase, ascii_uppercase
|
|
9
9
|
from typing import AnyStr
|
|
@@ -12,9 +12,15 @@ import trio
|
|
|
12
12
|
from attr import define
|
|
13
13
|
|
|
14
14
|
from shrinkray.passes.bytes import ByteReplacement, delete_intervals
|
|
15
|
-
from shrinkray.passes.definitions import
|
|
15
|
+
from shrinkray.passes.definitions import ReductionPass
|
|
16
16
|
from shrinkray.passes.patching import PatchApplier, Patches, apply_patches
|
|
17
|
-
from shrinkray.problem import
|
|
17
|
+
from shrinkray.problem import (
|
|
18
|
+
BasicReductionProblem,
|
|
19
|
+
Format,
|
|
20
|
+
ParseError,
|
|
21
|
+
ReductionProblem,
|
|
22
|
+
sort_key_for_initial,
|
|
23
|
+
)
|
|
18
24
|
from shrinkray.work import NotFound
|
|
19
25
|
|
|
20
26
|
|
|
@@ -240,10 +246,6 @@ async def simplify_brackets(problem: ReductionProblem[bytes]) -> None:
|
|
|
240
246
|
IDENTIFIER = re.compile(rb"(\b[A-Za-z][A-Za-z0-9_]*\b)|([0-9]+)")
|
|
241
247
|
|
|
242
248
|
|
|
243
|
-
def shortlex[T: Sized](s: T) -> tuple[int, T]:
|
|
244
|
-
return (len(s), s)
|
|
245
|
-
|
|
246
|
-
|
|
247
249
|
async def normalize_identifiers(problem: ReductionProblem[bytes]) -> None:
|
|
248
250
|
"""Replace identifiers with shorter alternatives.
|
|
249
251
|
|
|
@@ -261,8 +263,10 @@ async def normalize_identifiers(problem: ReductionProblem[bytes]) -> None:
|
|
|
261
263
|
replacements.add(c)
|
|
262
264
|
break
|
|
263
265
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
+
sort_key = sort_key_for_initial(problem.current_test_case)
|
|
267
|
+
|
|
268
|
+
replacements = sorted(replacements, key=sort_key)
|
|
269
|
+
targets = sorted(identifiers, key=sort_key, reverse=True)
|
|
266
270
|
|
|
267
271
|
# TODO: This could use better parallelisation.
|
|
268
272
|
for t in targets:
|
|
@@ -272,7 +276,7 @@ async def normalize_identifiers(problem: ReductionProblem[bytes]) -> None:
|
|
|
272
276
|
continue
|
|
273
277
|
|
|
274
278
|
async def can_replace(r):
|
|
275
|
-
if
|
|
279
|
+
if sort_key(r) >= sort_key(t):
|
|
276
280
|
return False
|
|
277
281
|
attempt = pattern.sub(r, source)
|
|
278
282
|
assert attempt != source
|
shrinkray/passes/json.py
CHANGED
|
@@ -4,9 +4,9 @@ from typing import Any
|
|
|
4
4
|
|
|
5
5
|
from attrs import define
|
|
6
6
|
|
|
7
|
-
from shrinkray.passes.definitions import
|
|
7
|
+
from shrinkray.passes.definitions import ReductionPass
|
|
8
8
|
from shrinkray.passes.patching import Patches, apply_patches
|
|
9
|
-
from shrinkray.problem import ReductionProblem
|
|
9
|
+
from shrinkray.problem import Format, ParseError, ReductionProblem
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def is_json(s: bytes) -> bool:
|
shrinkray/passes/sat.py
CHANGED
|
@@ -1,15 +1,10 @@
|
|
|
1
1
|
from collections import Counter, defaultdict
|
|
2
2
|
from collections.abc import Callable, Iterable, Iterator
|
|
3
3
|
|
|
4
|
-
from shrinkray.passes.definitions import
|
|
5
|
-
DumpError,
|
|
6
|
-
Format,
|
|
7
|
-
ParseError,
|
|
8
|
-
ReductionPass,
|
|
9
|
-
)
|
|
4
|
+
from shrinkray.passes.definitions import ReductionPass
|
|
10
5
|
from shrinkray.passes.patching import Conflict, SetPatches, apply_patches
|
|
11
6
|
from shrinkray.passes.sequences import delete_elements
|
|
12
|
-
from shrinkray.problem import ReductionProblem
|
|
7
|
+
from shrinkray.problem import DumpError, Format, ParseError, ReductionProblem
|
|
13
8
|
|
|
14
9
|
|
|
15
10
|
Clause = list[int]
|
shrinkray/problem.py
CHANGED
|
@@ -12,12 +12,13 @@ the details of caching, parallelism, and state management.
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
import hashlib
|
|
15
|
+
import string
|
|
15
16
|
import time
|
|
16
17
|
from abc import ABC, abstractmethod
|
|
17
18
|
from collections.abc import Awaitable, Callable, Sized
|
|
18
19
|
from datetime import timedelta
|
|
20
|
+
from functools import total_ordering
|
|
19
21
|
from typing import (
|
|
20
|
-
TYPE_CHECKING,
|
|
21
22
|
Any,
|
|
22
23
|
Protocol,
|
|
23
24
|
TypeVar,
|
|
@@ -29,12 +30,10 @@ import trio
|
|
|
29
30
|
from attrs import define
|
|
30
31
|
from humanize import naturalsize, precisedelta
|
|
31
32
|
|
|
33
|
+
from shrinkray.formatting import try_decode
|
|
32
34
|
from shrinkray.work import WorkContext
|
|
33
35
|
|
|
34
36
|
|
|
35
|
-
if TYPE_CHECKING:
|
|
36
|
-
from shrinkray.passes.definitions import Format
|
|
37
|
-
|
|
38
37
|
S = TypeVar("S")
|
|
39
38
|
T = TypeVar("T")
|
|
40
39
|
|
|
@@ -71,9 +70,196 @@ def shortlex[SizedT: Sized](value: SizedT) -> tuple[int, SizedT]:
|
|
|
71
70
|
return (len(value), value)
|
|
72
71
|
|
|
73
72
|
|
|
74
|
-
|
|
75
|
-
|
|
73
|
+
@total_ordering
|
|
74
|
+
class LazyChainedSortKey:
|
|
75
|
+
"""A comparison key that lazily evaluates a chain of comparison functions.
|
|
76
|
+
|
|
77
|
+
This class provides an ordering that compares values by applying a sequence
|
|
78
|
+
of functions in order. The first function that produces different values
|
|
79
|
+
for two inputs determines the ordering. If all functions return equal
|
|
80
|
+
values, the inputs are considered equal.
|
|
81
|
+
|
|
82
|
+
This is used to implement the natural ordering for strings, which compares
|
|
83
|
+
by length, then average squared line length, then number of lines, etc.
|
|
84
|
+
|
|
85
|
+
The "lazy" aspect is that comparison functions are only evaluated until
|
|
86
|
+
one returns different values, avoiding unnecessary computation.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
def __init__(self, functions: list[Callable[[T], Any]], value: T):
|
|
90
|
+
self.functions = functions
|
|
91
|
+
self.value = value
|
|
92
|
+
|
|
93
|
+
def __eq__(self, other):
|
|
94
|
+
if not isinstance(other, LazyChainedSortKey):
|
|
95
|
+
return NotImplemented
|
|
96
|
+
assert len(self.functions) == len(other.functions)
|
|
97
|
+
return self.value == other.value
|
|
98
|
+
|
|
99
|
+
def __lt__(self, other):
|
|
100
|
+
if self == other:
|
|
101
|
+
return False
|
|
102
|
+
if not isinstance(other, LazyChainedSortKey):
|
|
103
|
+
return NotImplemented
|
|
104
|
+
for f in self.functions:
|
|
105
|
+
self_key = f(self.value)
|
|
106
|
+
other_key = f(other.value)
|
|
107
|
+
if self_key < other_key:
|
|
108
|
+
return True
|
|
109
|
+
elif self_key > other_key:
|
|
110
|
+
return False
|
|
111
|
+
# All comparison functions returned equal values for different inputs.
|
|
112
|
+
# This shouldn't happen with the current functions (natural_string_lex
|
|
113
|
+
# compares character-by-character) but if it does, neither is less.
|
|
114
|
+
return False
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# Natural character ordering: whitespace < digits < lowercase < uppercase.
|
|
118
|
+
# Characters not in this string are sorted by ord() after all known characters.
|
|
119
|
+
NATURAL_CHARACTER_ORDER = (
|
|
120
|
+
string.whitespace + string.digits + string.ascii_lowercase + string.ascii_uppercase
|
|
121
|
+
)
|
|
122
|
+
NATURAL_CHARACTER_ORDER_INDEX = {s: i for i, s in enumerate(NATURAL_CHARACTER_ORDER)}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def character_index(c: str) -> int:
|
|
126
|
+
"""Return the sorting index for a character in natural ordering.
|
|
127
|
+
|
|
128
|
+
Characters in NATURAL_CHARACTER_ORDER get their position in that string.
|
|
129
|
+
Unknown characters (punctuation, unicode, etc.) sort after all known
|
|
130
|
+
characters, ordered by their Unicode code point.
|
|
131
|
+
"""
|
|
132
|
+
return NATURAL_CHARACTER_ORDER_INDEX.get(c, len(NATURAL_CHARACTER_ORDER) + ord(c))
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def natural_string_lex(s: str) -> list[int]:
|
|
136
|
+
"""Convert a string to a list of character indices for lexicographic comparison.
|
|
137
|
+
|
|
138
|
+
This transforms the string so that comparing the resulting lists gives
|
|
139
|
+
the natural character ordering (whitespace < digits < lowercase < uppercase).
|
|
140
|
+
"""
|
|
141
|
+
return list(map(character_index, s))
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# The chain of comparison functions used for natural string ordering.
|
|
145
|
+
# Each function is tried in sequence; the first that differs determines order.
|
|
146
|
+
#
|
|
147
|
+
# 1. Total length - shorter strings are always preferred
|
|
148
|
+
# 2. Average squared line length - penalizes very long lines, preferring balanced code
|
|
149
|
+
# Formula: sum(len(line)²) / count(lines)²
|
|
150
|
+
# 3. Number of lines - fewer lines is better (after accounting for balance)
|
|
151
|
+
# 4. List of line lengths - lexicographically compare line length sequences
|
|
152
|
+
# 5. Natural character order - whitespace < digits < lowercase < uppercase
|
|
153
|
+
NATURAL_ORDERING_FUNCTIONS: list[Callable[[str], Any]] = [
|
|
154
|
+
len,
|
|
155
|
+
lambda s: sum(len(line) ** 2 for line in s.split("\n")) / len(s.split("\n")) ** 2,
|
|
156
|
+
lambda s: len(s.splitlines()),
|
|
157
|
+
lambda s: list(map(len, s.splitlines())),
|
|
158
|
+
natural_string_lex,
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def natural_key(s: str) -> LazyChainedSortKey:
|
|
163
|
+
"""Return a comparison key for natural string ordering.
|
|
164
|
+
|
|
165
|
+
Natural ordering uses a chain of heuristics to determine which string
|
|
166
|
+
is "smaller" (more reduced). This is designed to produce human-readable
|
|
167
|
+
minimal test cases with balanced line lengths and natural character choices.
|
|
168
|
+
|
|
169
|
+
See NATURAL_ORDERING_FUNCTIONS for the complete ordering criteria.
|
|
170
|
+
"""
|
|
171
|
+
return LazyChainedSortKey(functions=NATURAL_ORDERING_FUNCTIONS, value=s)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def sort_key_for_initial(initial: Any) -> Callable[[Any], Any]:
|
|
175
|
+
"""Create a sort key function appropriate for the given initial value.
|
|
176
|
+
|
|
177
|
+
This examines the initial test case and returns a comparison function
|
|
178
|
+
that will be used to order all test cases during reduction.
|
|
179
|
+
|
|
180
|
+
For bytes:
|
|
181
|
+
- If decodable as text, uses natural ordering on the decoded string
|
|
182
|
+
- Falls back to shortlex for binary data that can't be decoded
|
|
183
|
+
|
|
184
|
+
For dicts:
|
|
185
|
+
- Orders by total size of values, then number of keys
|
|
186
|
+
- Then compares values for each key in order of largest-first
|
|
187
|
+
|
|
188
|
+
For other types:
|
|
189
|
+
- Falls back to natural ordering on repr()
|
|
190
|
+
|
|
191
|
+
The returned function can be used as a sort key or comparison key.
|
|
192
|
+
"""
|
|
193
|
+
if isinstance(initial, bytes):
|
|
194
|
+
encoding, _ = try_decode(initial)
|
|
195
|
+
if encoding is None:
|
|
196
|
+
return shortlex
|
|
197
|
+
else:
|
|
198
|
+
|
|
199
|
+
def natural_for_encoding(b: bytes) -> Any:
|
|
200
|
+
try:
|
|
201
|
+
s = b.decode(encoding)
|
|
202
|
+
return (0, natural_key(s))
|
|
203
|
+
except UnicodeDecodeError:
|
|
204
|
+
return (1, shortlex(b))
|
|
205
|
+
|
|
206
|
+
return natural_for_encoding
|
|
207
|
+
elif isinstance(initial, dict):
|
|
208
|
+
keys = sorted(initial, key=lambda k: shortlex(initial[k]), reverse=True)
|
|
209
|
+
natural_keys = {k: sort_key_for_initial(v) for k, v in initial.items()}
|
|
210
|
+
|
|
211
|
+
def dict_total_size(s):
|
|
212
|
+
return sum(len(v) for v in s.values())
|
|
213
|
+
|
|
214
|
+
def key_sort_key(k):
|
|
215
|
+
def f(x):
|
|
216
|
+
try:
|
|
217
|
+
v = x[k]
|
|
218
|
+
except KeyError:
|
|
219
|
+
return (0,)
|
|
220
|
+
else:
|
|
221
|
+
return (1, natural_keys[k](v))
|
|
222
|
+
|
|
223
|
+
return f
|
|
224
|
+
|
|
225
|
+
functions = [
|
|
226
|
+
dict_total_size,
|
|
227
|
+
len,
|
|
228
|
+
] + [key_sort_key(k) for k in keys]
|
|
229
|
+
|
|
230
|
+
def dict_sort_key(v):
|
|
231
|
+
return LazyChainedSortKey(
|
|
232
|
+
functions=functions,
|
|
233
|
+
value=v,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
return dict_sort_key
|
|
237
|
+
else:
|
|
238
|
+
# We don't use this branch in the main app, but this
|
|
239
|
+
# function is also used in tests.
|
|
240
|
+
def fallback_sort_key(s):
|
|
241
|
+
return natural_key(repr(s))
|
|
242
|
+
|
|
243
|
+
return fallback_sort_key
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def default_sort_key(value: Any) -> Any:
|
|
247
|
+
"""Return a comparison key for a value using type-appropriate ordering.
|
|
248
|
+
|
|
249
|
+
This is a simpler alternative to sort_key_for_initial that doesn't
|
|
250
|
+
examine the initial value to determine the best ordering.
|
|
251
|
+
|
|
252
|
+
- bytes: shortlex ordering (length, then lexicographic)
|
|
253
|
+
- str: natural ordering (length, line balance, character order)
|
|
254
|
+
- other: shortlex on repr()
|
|
255
|
+
|
|
256
|
+
Note: This really should return some sort of Comparable type, but Python
|
|
257
|
+
doesn't have a built-in protocol for that.
|
|
258
|
+
"""
|
|
259
|
+
if isinstance(value, bytes):
|
|
76
260
|
return shortlex(value)
|
|
261
|
+
elif isinstance(value, str):
|
|
262
|
+
return natural_key(value)
|
|
77
263
|
else:
|
|
78
264
|
return shortlex(repr(value))
|
|
79
265
|
|
|
@@ -85,6 +271,70 @@ def default_display(value: Any) -> str:
|
|
|
85
271
|
return f"value of size {len(value)}"
|
|
86
272
|
|
|
87
273
|
|
|
274
|
+
class ParseError(Exception):
|
|
275
|
+
"""Raised when a Format cannot parse its input."""
|
|
276
|
+
|
|
277
|
+
pass
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class DumpError(Exception):
|
|
281
|
+
"""Raised when a Format cannot serialize its output.
|
|
282
|
+
|
|
283
|
+
This occurs because not all internal representations map to valid
|
|
284
|
+
output in the target format. For example, a reduction might create
|
|
285
|
+
an invalid AST structure that cannot be converted back to source code.
|
|
286
|
+
"""
|
|
287
|
+
|
|
288
|
+
pass
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
class Format[S, T](ABC):
|
|
292
|
+
"""A bidirectional transformation between two types.
|
|
293
|
+
|
|
294
|
+
Formats enable format-agnostic passes by abstracting the
|
|
295
|
+
parse/serialize cycle. For example:
|
|
296
|
+
|
|
297
|
+
- Split(b"\\n"): bytes <-> list[bytes] (lines)
|
|
298
|
+
- Tokenize(): bytes <-> list[bytes] (tokens)
|
|
299
|
+
- JSON: bytes <-> Any (Python objects)
|
|
300
|
+
- DimacsCNF: bytes <-> list[list[int]] (SAT clauses)
|
|
301
|
+
|
|
302
|
+
A Format must satisfy the round-trip property:
|
|
303
|
+
dumps(parse(x)) should be equivalent to x
|
|
304
|
+
(possibly with normalization)
|
|
305
|
+
|
|
306
|
+
Example usage:
|
|
307
|
+
# Delete duplicate lines
|
|
308
|
+
compose(Split(b"\\n"), delete_duplicates)
|
|
309
|
+
|
|
310
|
+
# Reduce integer literals in source code
|
|
311
|
+
compose(IntegerFormat(), reduce_integer)
|
|
312
|
+
"""
|
|
313
|
+
|
|
314
|
+
@property
|
|
315
|
+
def name(self) -> str:
|
|
316
|
+
"""Human-readable name for this format, used in pass names."""
|
|
317
|
+
return repr(self)
|
|
318
|
+
|
|
319
|
+
@abstractmethod
|
|
320
|
+
def parse(self, input: S) -> T:
|
|
321
|
+
"""Parse input into the target type. Raises ParseError on failure."""
|
|
322
|
+
...
|
|
323
|
+
|
|
324
|
+
def is_valid(self, input: S) -> bool:
|
|
325
|
+
"""Check if input can be parsed by this format."""
|
|
326
|
+
try:
|
|
327
|
+
self.parse(input)
|
|
328
|
+
return True
|
|
329
|
+
except ParseError:
|
|
330
|
+
return False
|
|
331
|
+
|
|
332
|
+
@abstractmethod
|
|
333
|
+
def dumps(self, input: T) -> S:
|
|
334
|
+
"""Serialize the target type back to the source type."""
|
|
335
|
+
...
|
|
336
|
+
|
|
337
|
+
|
|
88
338
|
def default_size(value: Any) -> int:
|
|
89
339
|
try:
|
|
90
340
|
return len(value)
|
|
@@ -182,9 +432,7 @@ class ReductionProblem[T](ABC):
|
|
|
182
432
|
# Cache of View objects for each Format, to avoid re-parsing
|
|
183
433
|
self.__view_cache: dict[Any, ReductionProblem[Any]] = {}
|
|
184
434
|
|
|
185
|
-
def view(
|
|
186
|
-
self, format: "Format[T, S] | type[Format[T, S]]"
|
|
187
|
-
) -> "ReductionProblem[S]":
|
|
435
|
+
def view(self, format: Format[T, S] | type[Format[T, S]]) -> "ReductionProblem[S]":
|
|
188
436
|
"""Create a view of this problem through a Format.
|
|
189
437
|
|
|
190
438
|
A View wraps this problem, parsing the current test case through
|
|
@@ -481,8 +729,6 @@ class View[S, T](ReductionProblem[T]):
|
|
|
481
729
|
return self.__current
|
|
482
730
|
|
|
483
731
|
async def is_interesting(self, test_case: T) -> bool:
|
|
484
|
-
from shrinkray.passes.definitions import DumpError
|
|
485
|
-
|
|
486
732
|
try:
|
|
487
733
|
return await self.__problem.is_interesting(self.__dump(test_case))
|
|
488
734
|
except DumpError:
|
shrinkray/reducer.py
CHANGED
|
@@ -49,7 +49,12 @@ from shrinkray.passes.patching import PatchApplier, Patches
|
|
|
49
49
|
from shrinkray.passes.python import PYTHON_PASSES, is_python
|
|
50
50
|
from shrinkray.passes.sat import SAT_PASSES, DimacsCNF
|
|
51
51
|
from shrinkray.passes.sequences import block_deletion, delete_duplicates
|
|
52
|
-
from shrinkray.problem import
|
|
52
|
+
from shrinkray.problem import (
|
|
53
|
+
ReductionProblem,
|
|
54
|
+
ReductionStats,
|
|
55
|
+
shortlex,
|
|
56
|
+
sort_key_for_initial,
|
|
57
|
+
)
|
|
53
58
|
|
|
54
59
|
|
|
55
60
|
@define
|
|
@@ -531,6 +536,8 @@ class KeyProblem(ReductionProblem[bytes]):
|
|
|
531
536
|
self.base_problem = base_problem
|
|
532
537
|
self.applier = applier
|
|
533
538
|
self.key = key
|
|
539
|
+
# Use the appropriate sort key for this value (natural for text, shortlex for binary)
|
|
540
|
+
self._sort_key_fn = sort_key_for_initial(self.current_test_case)
|
|
534
541
|
|
|
535
542
|
@property
|
|
536
543
|
def current_test_case(self) -> bytes:
|
|
@@ -547,7 +554,7 @@ class KeyProblem(ReductionProblem[bytes]):
|
|
|
547
554
|
return len(test_case)
|
|
548
555
|
|
|
549
556
|
def sort_key(self, test_case: bytes) -> Any:
|
|
550
|
-
return
|
|
557
|
+
return self._sort_key_fn(test_case)
|
|
551
558
|
|
|
552
559
|
def display(self, value: bytes) -> str:
|
|
553
560
|
return repr(value)
|
shrinkray/state.py
CHANGED
|
@@ -10,7 +10,7 @@ import time
|
|
|
10
10
|
from abc import ABC, abstractmethod
|
|
11
11
|
from datetime import timedelta
|
|
12
12
|
from tempfile import TemporaryDirectory
|
|
13
|
-
from typing import Any
|
|
13
|
+
from typing import Any, TypeVar
|
|
14
14
|
|
|
15
15
|
import humanize
|
|
16
16
|
import trio
|
|
@@ -21,12 +21,15 @@ from shrinkray.problem import (
|
|
|
21
21
|
BasicReductionProblem,
|
|
22
22
|
InvalidInitialExample,
|
|
23
23
|
ReductionProblem,
|
|
24
|
-
|
|
24
|
+
sort_key_for_initial,
|
|
25
25
|
)
|
|
26
26
|
from shrinkray.reducer import DirectoryShrinkRay, Reducer, ShrinkRay
|
|
27
27
|
from shrinkray.work import Volume, WorkContext
|
|
28
28
|
|
|
29
29
|
|
|
30
|
+
T = TypeVar("T")
|
|
31
|
+
|
|
32
|
+
|
|
30
33
|
class TimeoutExceededOnInitial(InvalidInitialExample):
|
|
31
34
|
def __init__(self, runtime: float, timeout: float) -> None:
|
|
32
35
|
self.runtime = runtime
|
|
@@ -310,6 +313,7 @@ class ShrinkRayState[TestCase](ABC):
|
|
|
310
313
|
is_interesting=self.is_interesting,
|
|
311
314
|
initial=self.initial,
|
|
312
315
|
work=work,
|
|
316
|
+
sort_key=sort_key_for_initial(self.initial),
|
|
313
317
|
**self.extra_problem_kwargs,
|
|
314
318
|
)
|
|
315
319
|
|
|
@@ -593,20 +597,9 @@ class ShrinkRayDirectoryState(ShrinkRayState[dict[str, bytes]]):
|
|
|
593
597
|
def setup_formatter(self): ...
|
|
594
598
|
|
|
595
599
|
@property
|
|
596
|
-
def extra_problem_kwargs(self):
|
|
597
|
-
def dict_size(test_case: dict[str, bytes]) -> int:
|
|
598
|
-
return sum(len(v) for v in test_case.values())
|
|
599
|
-
|
|
600
|
-
def dict_sort_key(test_case: dict[str, bytes]) -> Any:
|
|
601
|
-
return (
|
|
602
|
-
len(test_case),
|
|
603
|
-
dict_size(test_case),
|
|
604
|
-
sorted((k, shortlex(v)) for k, v in test_case.items()),
|
|
605
|
-
)
|
|
606
|
-
|
|
600
|
+
def extra_problem_kwargs(self) -> dict[str, Any]:
|
|
607
601
|
return {
|
|
608
|
-
"
|
|
609
|
-
"size": dict_size,
|
|
602
|
+
"size": lambda tc: sum(len(v) for v in tc.values()),
|
|
610
603
|
}
|
|
611
604
|
|
|
612
605
|
def new_reducer(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: shrinkray
|
|
3
|
-
Version: 25.12.27.
|
|
3
|
+
Version: 25.12.27.2
|
|
4
4
|
Summary: Shrink Ray
|
|
5
5
|
Author-email: "David R. MacIver" <david@drmaciver.com>
|
|
6
6
|
License: MIT
|
|
@@ -159,30 +159,3 @@ not to handle well, but it's easy to extend with additional transformations.
|
|
|
159
159
|
It is also fairly easy to add support for new formats as needed.
|
|
160
160
|
|
|
161
161
|
If you run into a test case and interestingness test that you care about that shrink ray handles badly please let me know and I'll likely see about improving its handling of that format.
|
|
162
|
-
|
|
163
|
-
## Parallelism
|
|
164
|
-
|
|
165
|
-
You can control the number of parallel tasks shrinkray will run with the `--parallelism` flag. By default this will be the number of CPU cores you have available
|
|
166
|
-
|
|
167
|
-
Shrink Ray is designed to be able to run heavily in parallel, with a basic heuristic of aiming to be embarrassingly parallel when making no progress, mostly sequential when making progress, and smoothly scaling in between the two. It mostly succeeds at this.
|
|
168
|
-
|
|
169
|
-
Currently the bottleneck on scaling to a very large number of cores is how fast the controlling Python program can generate variant test cases to try and pass them to the interestingness test. This isn't well optimised at present and I don't currently have good benchmarks for it, but I'd expect you to be able to get linear speedups on most workflows while running 10-20 test cases in parallel, and to start to struggle past that.
|
|
170
|
-
|
|
171
|
-
This also depends on the performance of the interestingness test - the slower your test is to run, the more you'll be able to scale linearly with the number of cores available.
|
|
172
|
-
|
|
173
|
-
I'm quite interested in getting this part to scale well, so please let me know if you find examples where it doesn't seem to work.
|
|
174
|
-
|
|
175
|
-
## Bug Reports
|
|
176
|
-
|
|
177
|
-
Shrink Ray is still pretty new and under-tested software, so it definitely has bugs. If you run into any, [please file an issue](https://github.com/DRMacIver/shrinkray/issues).
|
|
178
|
-
|
|
179
|
-
As well as obvious bugs (crashes, etc) I'm also very interested in hearing about usability issues and cases where the reduced test case isn't very good.
|
|
180
|
-
|
|
181
|
-
Requests for new features, new supported formats, etc. also welcome although I'm less likely to jump right on them.
|
|
182
|
-
|
|
183
|
-
## Sponsorship
|
|
184
|
-
|
|
185
|
-
Shrink Ray is something of a labour of love - I wanted to have a tool that actually put into practice many of my ideas about test-case reduction, as I think the previous state of the art was well behind where I'd like it to be.
|
|
186
|
-
|
|
187
|
-
That being said, it is first and foremost designed to be a useful tool for practical engineering problems.
|
|
188
|
-
If you find it useful as such, please [consider sponsoring my development of it](https://github.com/sponsors/DRMacIver).
|
|
@@ -3,31 +3,31 @@ shrinkray/__main__.py,sha256=K3_s96Tyoi7SxNOyoZXkfiEoSxVBL__TJ3o2Cefadmg,11093
|
|
|
3
3
|
shrinkray/cli.py,sha256=1-qjaIchyCDd-YCdGWtK7q9j9qr6uX6AqtwW8m5QCQg,1697
|
|
4
4
|
shrinkray/display.py,sha256=WYN05uqmUVpZhwi2pxr1U-wLHWZ9KiL0RUlTCBJ1N3E,2430
|
|
5
5
|
shrinkray/formatting.py,sha256=tXCGnhJn-WJGpHMaLHRCAXK8aKJBbrOdiW9QGERrQEk,3121
|
|
6
|
-
shrinkray/problem.py,sha256=
|
|
6
|
+
shrinkray/problem.py,sha256=_edENYk8OC5o_2ng0WZrhIfilhlY5IuOrqt0qWBZAuM,25979
|
|
7
7
|
shrinkray/process.py,sha256=-eP8h5X0ESbkcTic8FFEzkd4-vwaZ0YI5tLxUR25L8U,1599
|
|
8
8
|
shrinkray/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
shrinkray/reducer.py,sha256=
|
|
10
|
-
shrinkray/state.py,sha256=
|
|
9
|
+
shrinkray/reducer.py,sha256=t2BBjq-EZ0qBbsez4ep0tFYCkDT8RwKYnjJ2fRsXubg,20096
|
|
10
|
+
shrinkray/state.py,sha256=owyDNiSeHKnCsgOQczSz796KEANem-4YWTmbmZBRE0w,24040
|
|
11
11
|
shrinkray/tui.py,sha256=3RskLo6JvKdUQIHi40R5ka-F_1GkBXyA_d_SkYbLlCw,31601
|
|
12
12
|
shrinkray/ui.py,sha256=xuDUwU-MM3AetvwUB7bfzav0P_drUsBrKFPhON_Nr-k,2251
|
|
13
13
|
shrinkray/work.py,sha256=GEZ14Kk3bvwUxAnACvY-wom2lVWaGrELMNxrDjv03dk,8110
|
|
14
14
|
shrinkray/passes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
shrinkray/passes/bytes.py,sha256=
|
|
15
|
+
shrinkray/passes/bytes.py,sha256=U1sNAuqUlsaLpRSJuS9X4H7S2jOiilTUIIE9-WjSd4k,23967
|
|
16
16
|
shrinkray/passes/clangdelta.py,sha256=t9EQ_kc159HRs48JwB5JvlJCsiCscrZgf2nhHCZRZX0,8419
|
|
17
|
-
shrinkray/passes/definitions.py,sha256=
|
|
18
|
-
shrinkray/passes/genericlanguages.py,sha256=
|
|
19
|
-
shrinkray/passes/json.py,sha256=
|
|
17
|
+
shrinkray/passes/definitions.py,sha256=TDDPimp7DE60xgidAE11npt2KicQAEi0UydIzjS9VHw,2469
|
|
18
|
+
shrinkray/passes/genericlanguages.py,sha256=ZqTfEHUTRbkm6PiTkBc_y3Q5Q0MJAnibNEiuKDOhiS0,10432
|
|
19
|
+
shrinkray/passes/json.py,sha256=FydtjpVD3lMjp9OEwv5mu8CAz96ofGxIZOzt9o6eLUA,2586
|
|
20
20
|
shrinkray/passes/patching.py,sha256=1uOTir3IbywKmsg6IIhSnxHFovZTdUCS-8PSwzgza00,8936
|
|
21
21
|
shrinkray/passes/python.py,sha256=3WN1lZTf5oVL8FCTGomhrCuE04wIX9ocKcmFV86NMZA,6875
|
|
22
|
-
shrinkray/passes/sat.py,sha256=
|
|
22
|
+
shrinkray/passes/sat.py,sha256=OboY6jsKf6lph3pAFh535plvhNOVzEF8HJ66WEqsNm4,19483
|
|
23
23
|
shrinkray/passes/sequences.py,sha256=jCK1fWBxCz79u7JWSps9wf7Yru7W_FAsJwdgg--CLxU,3040
|
|
24
24
|
shrinkray/subprocess/__init__.py,sha256=FyV2y05uwQ1RTZGwREI0aAVaLX1jiwRcWsdsksFmdbM,451
|
|
25
25
|
shrinkray/subprocess/client.py,sha256=erqnPglPO0YNdwEKlmhB3yDo6Mfc00Lxh4T85lZhsDo,9341
|
|
26
26
|
shrinkray/subprocess/protocol.py,sha256=LuHl0IkKpDzYhAGZz_EiTHNqDNq_v1ozg5aUSl7UzE4,6203
|
|
27
27
|
shrinkray/subprocess/worker.py,sha256=ke-9DYFH117EpJEntkucTrn7ep7pygzmV-VXkRe1o-E,19294
|
|
28
|
-
shrinkray-25.12.27.
|
|
29
|
-
shrinkray-25.12.27.
|
|
30
|
-
shrinkray-25.12.27.
|
|
31
|
-
shrinkray-25.12.27.
|
|
32
|
-
shrinkray-25.12.27.
|
|
33
|
-
shrinkray-25.12.27.
|
|
28
|
+
shrinkray-25.12.27.2.dist-info/licenses/LICENSE,sha256=iMKX79AuokJfIZUnGUARdUp30vVAoIPOJ7ek8TY63kk,1072
|
|
29
|
+
shrinkray-25.12.27.2.dist-info/METADATA,sha256=-rrEVZeXrERITn0adTTOOi6Ui7fK6Dz-GvyFEQcHvtw,7600
|
|
30
|
+
shrinkray-25.12.27.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
31
|
+
shrinkray-25.12.27.2.dist-info/entry_points.txt,sha256=wIZvnGyOdVeaLTiv2klnSyTe-EKkkwn4SwHh9bmJ7qk,104
|
|
32
|
+
shrinkray-25.12.27.2.dist-info/top_level.txt,sha256=fLif8-rFoFOnf5h8-vs3ECkKNWQopTQh3xvl1s7pchQ,10
|
|
33
|
+
shrinkray-25.12.27.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|