shrinkray 25.12.27.1__py3-none-any.whl → 25.12.27.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shrinkray/__main__.py +25 -11
- shrinkray/passes/bytes.py +8 -7
- shrinkray/passes/definitions.py +3 -67
- shrinkray/passes/genericlanguages.py +14 -10
- shrinkray/passes/json.py +2 -2
- shrinkray/passes/sat.py +2 -7
- shrinkray/problem.py +257 -11
- shrinkray/reducer.py +9 -2
- shrinkray/state.py +199 -67
- shrinkray/subprocess/client.py +2 -0
- shrinkray/subprocess/protocol.py +8 -0
- shrinkray/subprocess/worker.py +67 -17
- shrinkray/tui.py +114 -92
- shrinkray/validation.py +403 -0
- {shrinkray-25.12.27.1.dist-info → shrinkray-25.12.27.3.dist-info}/METADATA +1 -28
- shrinkray-25.12.27.3.dist-info/RECORD +34 -0
- shrinkray-25.12.27.1.dist-info/RECORD +0 -33
- {shrinkray-25.12.27.1.dist-info → shrinkray-25.12.27.3.dist-info}/WHEEL +0 -0
- {shrinkray-25.12.27.1.dist-info → shrinkray-25.12.27.3.dist-info}/entry_points.txt +0 -0
- {shrinkray-25.12.27.1.dist-info → shrinkray-25.12.27.3.dist-info}/licenses/LICENSE +0 -0
- {shrinkray-25.12.27.1.dist-info → shrinkray-25.12.27.3.dist-info}/top_level.txt +0 -0
shrinkray/__main__.py
CHANGED
|
@@ -17,18 +17,19 @@ from shrinkray.cli import (
|
|
|
17
17
|
validate_command,
|
|
18
18
|
validate_ui,
|
|
19
19
|
)
|
|
20
|
+
from shrinkray.formatting import determine_formatter_command
|
|
20
21
|
from shrinkray.passes.clangdelta import (
|
|
21
22
|
C_FILE_EXTENSIONS,
|
|
22
23
|
ClangDelta,
|
|
23
24
|
find_clang_delta,
|
|
24
25
|
)
|
|
25
|
-
from shrinkray.problem import InvalidInitialExample
|
|
26
26
|
from shrinkray.state import (
|
|
27
27
|
ShrinkRayDirectoryState,
|
|
28
28
|
ShrinkRayState,
|
|
29
29
|
ShrinkRayStateSingleFile,
|
|
30
30
|
)
|
|
31
31
|
from shrinkray.ui import BasicUI, ShrinkRayUI
|
|
32
|
+
from shrinkray.validation import run_validation
|
|
32
33
|
from shrinkray.work import Volume
|
|
33
34
|
|
|
34
35
|
|
|
@@ -39,12 +40,9 @@ async def run_shrink_ray(
|
|
|
39
40
|
"""Run the shrink ray reduction process."""
|
|
40
41
|
async with trio.open_nursery() as nursery:
|
|
41
42
|
problem = state.problem
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
assert len(excs.exceptions) == 1
|
|
46
|
-
(e,) = excs.exceptions
|
|
47
|
-
await state.report_error(e)
|
|
43
|
+
# Validation runs before run_shrink_ray is called, so setup() should
|
|
44
|
+
# always succeed. If it doesn't, there's a bug and we want it to propagate.
|
|
45
|
+
await problem.setup()
|
|
48
46
|
|
|
49
47
|
reducer = state.reducer
|
|
50
48
|
|
|
@@ -273,6 +271,26 @@ def main(
|
|
|
273
271
|
if not backup:
|
|
274
272
|
backup = filename + os.extsep + "bak"
|
|
275
273
|
|
|
274
|
+
# Run initial validation before any state setup
|
|
275
|
+
# This validates the interestingness test and formatter with proper output streaming
|
|
276
|
+
formatter_command = None
|
|
277
|
+
if not os.path.isdir(filename) and formatter.lower() != "none":
|
|
278
|
+
formatter_command = determine_formatter_command(formatter, filename)
|
|
279
|
+
|
|
280
|
+
validation_result = run_validation(
|
|
281
|
+
file_path=filename,
|
|
282
|
+
test=test,
|
|
283
|
+
input_type=input_type,
|
|
284
|
+
in_place=in_place,
|
|
285
|
+
formatter_command=formatter_command,
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
if not validation_result.success:
|
|
289
|
+
print(f"\nError: {validation_result.error_message}", file=sys.stderr)
|
|
290
|
+
sys.exit(1)
|
|
291
|
+
|
|
292
|
+
print("\nStarting reduction...", file=sys.stderr, flush=True)
|
|
293
|
+
|
|
276
294
|
state_kwargs: dict[str, Any] = {
|
|
277
295
|
"input_type": input_type,
|
|
278
296
|
"in_place": in_place,
|
|
@@ -307,8 +325,6 @@ def main(
|
|
|
307
325
|
|
|
308
326
|
state = ShrinkRayDirectoryState(initial=initial, **state_kwargs)
|
|
309
327
|
|
|
310
|
-
trio.run(state.check_formatter)
|
|
311
|
-
|
|
312
328
|
else:
|
|
313
329
|
try:
|
|
314
330
|
os.remove(backup)
|
|
@@ -323,8 +339,6 @@ def main(
|
|
|
323
339
|
|
|
324
340
|
state = ShrinkRayStateSingleFile(initial=initial, **state_kwargs)
|
|
325
341
|
|
|
326
|
-
trio.run(state.check_formatter)
|
|
327
|
-
|
|
328
342
|
if ui_type == UIType.textual:
|
|
329
343
|
from shrinkray.tui import run_textual_ui
|
|
330
344
|
|
shrinkray/passes/bytes.py
CHANGED
|
@@ -24,8 +24,8 @@ from collections.abc import Sequence
|
|
|
24
24
|
|
|
25
25
|
from attrs import define
|
|
26
26
|
|
|
27
|
-
from shrinkray.passes.definitions import Format, ReductionProblem
|
|
28
27
|
from shrinkray.passes.patching import Cuts, Patches, apply_patches
|
|
28
|
+
from shrinkray.problem import Format, ReductionProblem
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
@define(frozen=True)
|
|
@@ -739,12 +739,13 @@ async def line_sorter(problem: ReductionProblem[bytes]):
|
|
|
739
739
|
while i < len(lines):
|
|
740
740
|
j = i
|
|
741
741
|
while j > 0:
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
742
|
+
attempt = list(lines)
|
|
743
|
+
attempt[j - 1], attempt[j] = attempt[j], attempt[j - 1]
|
|
744
|
+
new_test_case = b"\n".join(attempt)
|
|
745
|
+
if problem.sort_key(new_test_case) < problem.sort_key(
|
|
746
|
+
problem.current_test_case
|
|
747
|
+
):
|
|
748
|
+
if not await problem.is_interesting(new_test_case):
|
|
748
749
|
break
|
|
749
750
|
else:
|
|
750
751
|
j -= 1
|
shrinkray/passes/definitions.py
CHANGED
|
@@ -4,20 +4,20 @@ This module defines the core type aliases and abstractions for reduction:
|
|
|
4
4
|
|
|
5
5
|
- ReductionPass[T]: A function that attempts to reduce a test case
|
|
6
6
|
- ReductionPump[T]: A function that may temporarily increase test case size
|
|
7
|
-
- Format[S, T]: A bidirectional transformation between types
|
|
8
7
|
- compose(): Combines a Format with a pass to work on a different type
|
|
9
8
|
|
|
10
9
|
These abstractions enable format-agnostic reduction: the same pass
|
|
11
10
|
(e.g., "delete duplicate elements") can work on bytes, lines, tokens,
|
|
12
11
|
JSON arrays, or any other sequence-like type.
|
|
12
|
+
|
|
13
|
+
Note: Format, ParseError, and DumpError are defined in shrinkray.problem.
|
|
13
14
|
"""
|
|
14
15
|
|
|
15
|
-
from abc import ABC, abstractmethod
|
|
16
16
|
from collections.abc import Awaitable, Callable
|
|
17
17
|
from functools import wraps
|
|
18
18
|
from typing import TypeVar
|
|
19
19
|
|
|
20
|
-
from shrinkray.problem import ReductionProblem
|
|
20
|
+
from shrinkray.problem import Format, ParseError, ReductionProblem
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
S = TypeVar("S")
|
|
@@ -36,70 +36,6 @@ ReductionPass = Callable[[ReductionProblem[T]], Awaitable[None]]
|
|
|
36
36
|
ReductionPump = Callable[[ReductionProblem[T]], Awaitable[T]]
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
class ParseError(Exception):
|
|
40
|
-
"""Raised when a Format cannot parse its input."""
|
|
41
|
-
|
|
42
|
-
pass
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
class DumpError(Exception):
|
|
46
|
-
"""Raised when a Format cannot serialize its output.
|
|
47
|
-
|
|
48
|
-
This occurs because not all internal representations map to valid
|
|
49
|
-
output in the target format. For example, a reduction might create
|
|
50
|
-
an invalid AST structure that cannot be converted back to source code.
|
|
51
|
-
"""
|
|
52
|
-
|
|
53
|
-
pass
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
class Format[S, T](ABC):
|
|
57
|
-
"""A bidirectional transformation between two types.
|
|
58
|
-
|
|
59
|
-
Formats enable format-agnostic passes by abstracting the
|
|
60
|
-
parse/serialize cycle. For example:
|
|
61
|
-
|
|
62
|
-
- Split(b"\\n"): bytes <-> list[bytes] (lines)
|
|
63
|
-
- Tokenize(): bytes <-> list[bytes] (tokens)
|
|
64
|
-
- JSON: bytes <-> Any (Python objects)
|
|
65
|
-
- DimacsCNF: bytes <-> list[list[int]] (SAT clauses)
|
|
66
|
-
|
|
67
|
-
A Format must satisfy the round-trip property:
|
|
68
|
-
dumps(parse(x)) should be equivalent to x
|
|
69
|
-
(possibly with normalization)
|
|
70
|
-
|
|
71
|
-
Example usage:
|
|
72
|
-
# Delete duplicate lines
|
|
73
|
-
compose(Split(b"\\n"), delete_duplicates)
|
|
74
|
-
|
|
75
|
-
# Reduce integer literals in source code
|
|
76
|
-
compose(IntegerFormat(), reduce_integer)
|
|
77
|
-
"""
|
|
78
|
-
|
|
79
|
-
@property
|
|
80
|
-
def name(self) -> str:
|
|
81
|
-
"""Human-readable name for this format, used in pass names."""
|
|
82
|
-
return repr(self)
|
|
83
|
-
|
|
84
|
-
@abstractmethod
|
|
85
|
-
def parse(self, input: S) -> T:
|
|
86
|
-
"""Parse input into the target type. Raises ParseError on failure."""
|
|
87
|
-
...
|
|
88
|
-
|
|
89
|
-
def is_valid(self, input: S) -> bool:
|
|
90
|
-
"""Check if input can be parsed by this format."""
|
|
91
|
-
try:
|
|
92
|
-
self.parse(input)
|
|
93
|
-
return True
|
|
94
|
-
except ParseError:
|
|
95
|
-
return False
|
|
96
|
-
|
|
97
|
-
@abstractmethod
|
|
98
|
-
def dumps(self, input: T) -> S:
|
|
99
|
-
"""Serialize the target type back to the source type."""
|
|
100
|
-
...
|
|
101
|
-
|
|
102
|
-
|
|
103
39
|
def compose(format: Format[S, T], reduction_pass: ReductionPass[T]) -> ReductionPass[S]:
|
|
104
40
|
"""Wrap a reduction pass to work through a Format transformation.
|
|
105
41
|
|
|
@@ -3,7 +3,7 @@ Module of reduction passes designed for "things that look like programming langu
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import re
|
|
6
|
-
from collections.abc import Callable
|
|
6
|
+
from collections.abc import Callable
|
|
7
7
|
from functools import wraps
|
|
8
8
|
from string import ascii_lowercase, ascii_uppercase
|
|
9
9
|
from typing import AnyStr
|
|
@@ -12,9 +12,15 @@ import trio
|
|
|
12
12
|
from attr import define
|
|
13
13
|
|
|
14
14
|
from shrinkray.passes.bytes import ByteReplacement, delete_intervals
|
|
15
|
-
from shrinkray.passes.definitions import
|
|
15
|
+
from shrinkray.passes.definitions import ReductionPass
|
|
16
16
|
from shrinkray.passes.patching import PatchApplier, Patches, apply_patches
|
|
17
|
-
from shrinkray.problem import
|
|
17
|
+
from shrinkray.problem import (
|
|
18
|
+
BasicReductionProblem,
|
|
19
|
+
Format,
|
|
20
|
+
ParseError,
|
|
21
|
+
ReductionProblem,
|
|
22
|
+
sort_key_for_initial,
|
|
23
|
+
)
|
|
18
24
|
from shrinkray.work import NotFound
|
|
19
25
|
|
|
20
26
|
|
|
@@ -240,10 +246,6 @@ async def simplify_brackets(problem: ReductionProblem[bytes]) -> None:
|
|
|
240
246
|
IDENTIFIER = re.compile(rb"(\b[A-Za-z][A-Za-z0-9_]*\b)|([0-9]+)")
|
|
241
247
|
|
|
242
248
|
|
|
243
|
-
def shortlex[T: Sized](s: T) -> tuple[int, T]:
|
|
244
|
-
return (len(s), s)
|
|
245
|
-
|
|
246
|
-
|
|
247
249
|
async def normalize_identifiers(problem: ReductionProblem[bytes]) -> None:
|
|
248
250
|
"""Replace identifiers with shorter alternatives.
|
|
249
251
|
|
|
@@ -261,8 +263,10 @@ async def normalize_identifiers(problem: ReductionProblem[bytes]) -> None:
|
|
|
261
263
|
replacements.add(c)
|
|
262
264
|
break
|
|
263
265
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
+
sort_key = sort_key_for_initial(problem.current_test_case)
|
|
267
|
+
|
|
268
|
+
replacements = sorted(replacements, key=sort_key)
|
|
269
|
+
targets = sorted(identifiers, key=sort_key, reverse=True)
|
|
266
270
|
|
|
267
271
|
# TODO: This could use better parallelisation.
|
|
268
272
|
for t in targets:
|
|
@@ -272,7 +276,7 @@ async def normalize_identifiers(problem: ReductionProblem[bytes]) -> None:
|
|
|
272
276
|
continue
|
|
273
277
|
|
|
274
278
|
async def can_replace(r):
|
|
275
|
-
if
|
|
279
|
+
if sort_key(r) >= sort_key(t):
|
|
276
280
|
return False
|
|
277
281
|
attempt = pattern.sub(r, source)
|
|
278
282
|
assert attempt != source
|
shrinkray/passes/json.py
CHANGED
|
@@ -4,9 +4,9 @@ from typing import Any
|
|
|
4
4
|
|
|
5
5
|
from attrs import define
|
|
6
6
|
|
|
7
|
-
from shrinkray.passes.definitions import
|
|
7
|
+
from shrinkray.passes.definitions import ReductionPass
|
|
8
8
|
from shrinkray.passes.patching import Patches, apply_patches
|
|
9
|
-
from shrinkray.problem import ReductionProblem
|
|
9
|
+
from shrinkray.problem import Format, ParseError, ReductionProblem
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def is_json(s: bytes) -> bool:
|
shrinkray/passes/sat.py
CHANGED
|
@@ -1,15 +1,10 @@
|
|
|
1
1
|
from collections import Counter, defaultdict
|
|
2
2
|
from collections.abc import Callable, Iterable, Iterator
|
|
3
3
|
|
|
4
|
-
from shrinkray.passes.definitions import
|
|
5
|
-
DumpError,
|
|
6
|
-
Format,
|
|
7
|
-
ParseError,
|
|
8
|
-
ReductionPass,
|
|
9
|
-
)
|
|
4
|
+
from shrinkray.passes.definitions import ReductionPass
|
|
10
5
|
from shrinkray.passes.patching import Conflict, SetPatches, apply_patches
|
|
11
6
|
from shrinkray.passes.sequences import delete_elements
|
|
12
|
-
from shrinkray.problem import ReductionProblem
|
|
7
|
+
from shrinkray.problem import DumpError, Format, ParseError, ReductionProblem
|
|
13
8
|
|
|
14
9
|
|
|
15
10
|
Clause = list[int]
|
shrinkray/problem.py
CHANGED
|
@@ -12,12 +12,13 @@ the details of caching, parallelism, and state management.
|
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
14
|
import hashlib
|
|
15
|
+
import string
|
|
15
16
|
import time
|
|
16
17
|
from abc import ABC, abstractmethod
|
|
17
18
|
from collections.abc import Awaitable, Callable, Sized
|
|
18
19
|
from datetime import timedelta
|
|
20
|
+
from functools import total_ordering
|
|
19
21
|
from typing import (
|
|
20
|
-
TYPE_CHECKING,
|
|
21
22
|
Any,
|
|
22
23
|
Protocol,
|
|
23
24
|
TypeVar,
|
|
@@ -29,12 +30,10 @@ import trio
|
|
|
29
30
|
from attrs import define
|
|
30
31
|
from humanize import naturalsize, precisedelta
|
|
31
32
|
|
|
33
|
+
from shrinkray.formatting import try_decode
|
|
32
34
|
from shrinkray.work import WorkContext
|
|
33
35
|
|
|
34
36
|
|
|
35
|
-
if TYPE_CHECKING:
|
|
36
|
-
from shrinkray.passes.definitions import Format
|
|
37
|
-
|
|
38
37
|
S = TypeVar("S")
|
|
39
38
|
T = TypeVar("T")
|
|
40
39
|
|
|
@@ -71,9 +70,196 @@ def shortlex[SizedT: Sized](value: SizedT) -> tuple[int, SizedT]:
|
|
|
71
70
|
return (len(value), value)
|
|
72
71
|
|
|
73
72
|
|
|
74
|
-
|
|
75
|
-
|
|
73
|
+
@total_ordering
|
|
74
|
+
class LazyChainedSortKey:
|
|
75
|
+
"""A comparison key that lazily evaluates a chain of comparison functions.
|
|
76
|
+
|
|
77
|
+
This class provides an ordering that compares values by applying a sequence
|
|
78
|
+
of functions in order. The first function that produces different values
|
|
79
|
+
for two inputs determines the ordering. If all functions return equal
|
|
80
|
+
values, the inputs are considered equal.
|
|
81
|
+
|
|
82
|
+
This is used to implement the natural ordering for strings, which compares
|
|
83
|
+
by length, then average squared line length, then number of lines, etc.
|
|
84
|
+
|
|
85
|
+
The "lazy" aspect is that comparison functions are only evaluated until
|
|
86
|
+
one returns different values, avoiding unnecessary computation.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
def __init__(self, functions: list[Callable[[T], Any]], value: T):
|
|
90
|
+
self.functions = functions
|
|
91
|
+
self.value = value
|
|
92
|
+
|
|
93
|
+
def __eq__(self, other):
|
|
94
|
+
if not isinstance(other, LazyChainedSortKey):
|
|
95
|
+
return NotImplemented
|
|
96
|
+
assert len(self.functions) == len(other.functions)
|
|
97
|
+
return self.value == other.value
|
|
98
|
+
|
|
99
|
+
def __lt__(self, other):
|
|
100
|
+
if self == other:
|
|
101
|
+
return False
|
|
102
|
+
if not isinstance(other, LazyChainedSortKey):
|
|
103
|
+
return NotImplemented
|
|
104
|
+
for f in self.functions:
|
|
105
|
+
self_key = f(self.value)
|
|
106
|
+
other_key = f(other.value)
|
|
107
|
+
if self_key < other_key:
|
|
108
|
+
return True
|
|
109
|
+
elif self_key > other_key:
|
|
110
|
+
return False
|
|
111
|
+
# All comparison functions returned equal values for different inputs.
|
|
112
|
+
# This shouldn't happen with the current functions (natural_string_lex
|
|
113
|
+
# compares character-by-character) but if it does, neither is less.
|
|
114
|
+
return False
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# Natural character ordering: whitespace < digits < lowercase < uppercase.
|
|
118
|
+
# Characters not in this string are sorted by ord() after all known characters.
|
|
119
|
+
NATURAL_CHARACTER_ORDER = (
|
|
120
|
+
string.whitespace + string.digits + string.ascii_lowercase + string.ascii_uppercase
|
|
121
|
+
)
|
|
122
|
+
NATURAL_CHARACTER_ORDER_INDEX = {s: i for i, s in enumerate(NATURAL_CHARACTER_ORDER)}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def character_index(c: str) -> int:
|
|
126
|
+
"""Return the sorting index for a character in natural ordering.
|
|
127
|
+
|
|
128
|
+
Characters in NATURAL_CHARACTER_ORDER get their position in that string.
|
|
129
|
+
Unknown characters (punctuation, unicode, etc.) sort after all known
|
|
130
|
+
characters, ordered by their Unicode code point.
|
|
131
|
+
"""
|
|
132
|
+
return NATURAL_CHARACTER_ORDER_INDEX.get(c, len(NATURAL_CHARACTER_ORDER) + ord(c))
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def natural_string_lex(s: str) -> list[int]:
|
|
136
|
+
"""Convert a string to a list of character indices for lexicographic comparison.
|
|
137
|
+
|
|
138
|
+
This transforms the string so that comparing the resulting lists gives
|
|
139
|
+
the natural character ordering (whitespace < digits < lowercase < uppercase).
|
|
140
|
+
"""
|
|
141
|
+
return list(map(character_index, s))
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# The chain of comparison functions used for natural string ordering.
|
|
145
|
+
# Each function is tried in sequence; the first that differs determines order.
|
|
146
|
+
#
|
|
147
|
+
# 1. Total length - shorter strings are always preferred
|
|
148
|
+
# 2. Average squared line length - penalizes very long lines, preferring balanced code
|
|
149
|
+
# Formula: sum(len(line)²) / count(lines)²
|
|
150
|
+
# 3. Number of lines - fewer lines is better (after accounting for balance)
|
|
151
|
+
# 4. List of line lengths - lexicographically compare line length sequences
|
|
152
|
+
# 5. Natural character order - whitespace < digits < lowercase < uppercase
|
|
153
|
+
NATURAL_ORDERING_FUNCTIONS: list[Callable[[str], Any]] = [
|
|
154
|
+
len,
|
|
155
|
+
lambda s: sum(len(line) ** 2 for line in s.split("\n")) / len(s.split("\n")) ** 2,
|
|
156
|
+
lambda s: len(s.splitlines()),
|
|
157
|
+
lambda s: list(map(len, s.splitlines())),
|
|
158
|
+
natural_string_lex,
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def natural_key(s: str) -> LazyChainedSortKey:
|
|
163
|
+
"""Return a comparison key for natural string ordering.
|
|
164
|
+
|
|
165
|
+
Natural ordering uses a chain of heuristics to determine which string
|
|
166
|
+
is "smaller" (more reduced). This is designed to produce human-readable
|
|
167
|
+
minimal test cases with balanced line lengths and natural character choices.
|
|
168
|
+
|
|
169
|
+
See NATURAL_ORDERING_FUNCTIONS for the complete ordering criteria.
|
|
170
|
+
"""
|
|
171
|
+
return LazyChainedSortKey(functions=NATURAL_ORDERING_FUNCTIONS, value=s)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def sort_key_for_initial(initial: Any) -> Callable[[Any], Any]:
|
|
175
|
+
"""Create a sort key function appropriate for the given initial value.
|
|
176
|
+
|
|
177
|
+
This examines the initial test case and returns a comparison function
|
|
178
|
+
that will be used to order all test cases during reduction.
|
|
179
|
+
|
|
180
|
+
For bytes:
|
|
181
|
+
- If decodable as text, uses natural ordering on the decoded string
|
|
182
|
+
- Falls back to shortlex for binary data that can't be decoded
|
|
183
|
+
|
|
184
|
+
For dicts:
|
|
185
|
+
- Orders by total size of values, then number of keys
|
|
186
|
+
- Then compares values for each key in order of largest-first
|
|
187
|
+
|
|
188
|
+
For other types:
|
|
189
|
+
- Falls back to natural ordering on repr()
|
|
190
|
+
|
|
191
|
+
The returned function can be used as a sort key or comparison key.
|
|
192
|
+
"""
|
|
193
|
+
if isinstance(initial, bytes):
|
|
194
|
+
encoding, _ = try_decode(initial)
|
|
195
|
+
if encoding is None:
|
|
196
|
+
return shortlex
|
|
197
|
+
else:
|
|
198
|
+
|
|
199
|
+
def natural_for_encoding(b: bytes) -> Any:
|
|
200
|
+
try:
|
|
201
|
+
s = b.decode(encoding)
|
|
202
|
+
return (0, natural_key(s))
|
|
203
|
+
except UnicodeDecodeError:
|
|
204
|
+
return (1, shortlex(b))
|
|
205
|
+
|
|
206
|
+
return natural_for_encoding
|
|
207
|
+
elif isinstance(initial, dict):
|
|
208
|
+
keys = sorted(initial, key=lambda k: shortlex(initial[k]), reverse=True)
|
|
209
|
+
natural_keys = {k: sort_key_for_initial(v) for k, v in initial.items()}
|
|
210
|
+
|
|
211
|
+
def dict_total_size(s):
|
|
212
|
+
return sum(len(v) for v in s.values())
|
|
213
|
+
|
|
214
|
+
def key_sort_key(k):
|
|
215
|
+
def f(x):
|
|
216
|
+
try:
|
|
217
|
+
v = x[k]
|
|
218
|
+
except KeyError:
|
|
219
|
+
return (0,)
|
|
220
|
+
else:
|
|
221
|
+
return (1, natural_keys[k](v))
|
|
222
|
+
|
|
223
|
+
return f
|
|
224
|
+
|
|
225
|
+
functions = [
|
|
226
|
+
dict_total_size,
|
|
227
|
+
len,
|
|
228
|
+
] + [key_sort_key(k) for k in keys]
|
|
229
|
+
|
|
230
|
+
def dict_sort_key(v):
|
|
231
|
+
return LazyChainedSortKey(
|
|
232
|
+
functions=functions,
|
|
233
|
+
value=v,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
return dict_sort_key
|
|
237
|
+
else:
|
|
238
|
+
# We don't use this branch in the main app, but this
|
|
239
|
+
# function is also used in tests.
|
|
240
|
+
def fallback_sort_key(s):
|
|
241
|
+
return natural_key(repr(s))
|
|
242
|
+
|
|
243
|
+
return fallback_sort_key
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def default_sort_key(value: Any) -> Any:
|
|
247
|
+
"""Return a comparison key for a value using type-appropriate ordering.
|
|
248
|
+
|
|
249
|
+
This is a simpler alternative to sort_key_for_initial that doesn't
|
|
250
|
+
examine the initial value to determine the best ordering.
|
|
251
|
+
|
|
252
|
+
- bytes: shortlex ordering (length, then lexicographic)
|
|
253
|
+
- str: natural ordering (length, line balance, character order)
|
|
254
|
+
- other: shortlex on repr()
|
|
255
|
+
|
|
256
|
+
Note: This really should return some sort of Comparable type, but Python
|
|
257
|
+
doesn't have a built-in protocol for that.
|
|
258
|
+
"""
|
|
259
|
+
if isinstance(value, bytes):
|
|
76
260
|
return shortlex(value)
|
|
261
|
+
elif isinstance(value, str):
|
|
262
|
+
return natural_key(value)
|
|
77
263
|
else:
|
|
78
264
|
return shortlex(repr(value))
|
|
79
265
|
|
|
@@ -85,6 +271,70 @@ def default_display(value: Any) -> str:
|
|
|
85
271
|
return f"value of size {len(value)}"
|
|
86
272
|
|
|
87
273
|
|
|
274
|
+
class ParseError(Exception):
|
|
275
|
+
"""Raised when a Format cannot parse its input."""
|
|
276
|
+
|
|
277
|
+
pass
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class DumpError(Exception):
|
|
281
|
+
"""Raised when a Format cannot serialize its output.
|
|
282
|
+
|
|
283
|
+
This occurs because not all internal representations map to valid
|
|
284
|
+
output in the target format. For example, a reduction might create
|
|
285
|
+
an invalid AST structure that cannot be converted back to source code.
|
|
286
|
+
"""
|
|
287
|
+
|
|
288
|
+
pass
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
class Format[S, T](ABC):
|
|
292
|
+
"""A bidirectional transformation between two types.
|
|
293
|
+
|
|
294
|
+
Formats enable format-agnostic passes by abstracting the
|
|
295
|
+
parse/serialize cycle. For example:
|
|
296
|
+
|
|
297
|
+
- Split(b"\\n"): bytes <-> list[bytes] (lines)
|
|
298
|
+
- Tokenize(): bytes <-> list[bytes] (tokens)
|
|
299
|
+
- JSON: bytes <-> Any (Python objects)
|
|
300
|
+
- DimacsCNF: bytes <-> list[list[int]] (SAT clauses)
|
|
301
|
+
|
|
302
|
+
A Format must satisfy the round-trip property:
|
|
303
|
+
dumps(parse(x)) should be equivalent to x
|
|
304
|
+
(possibly with normalization)
|
|
305
|
+
|
|
306
|
+
Example usage:
|
|
307
|
+
# Delete duplicate lines
|
|
308
|
+
compose(Split(b"\\n"), delete_duplicates)
|
|
309
|
+
|
|
310
|
+
# Reduce integer literals in source code
|
|
311
|
+
compose(IntegerFormat(), reduce_integer)
|
|
312
|
+
"""
|
|
313
|
+
|
|
314
|
+
@property
|
|
315
|
+
def name(self) -> str:
|
|
316
|
+
"""Human-readable name for this format, used in pass names."""
|
|
317
|
+
return repr(self)
|
|
318
|
+
|
|
319
|
+
@abstractmethod
|
|
320
|
+
def parse(self, input: S) -> T:
|
|
321
|
+
"""Parse input into the target type. Raises ParseError on failure."""
|
|
322
|
+
...
|
|
323
|
+
|
|
324
|
+
def is_valid(self, input: S) -> bool:
|
|
325
|
+
"""Check if input can be parsed by this format."""
|
|
326
|
+
try:
|
|
327
|
+
self.parse(input)
|
|
328
|
+
return True
|
|
329
|
+
except ParseError:
|
|
330
|
+
return False
|
|
331
|
+
|
|
332
|
+
@abstractmethod
|
|
333
|
+
def dumps(self, input: T) -> S:
|
|
334
|
+
"""Serialize the target type back to the source type."""
|
|
335
|
+
...
|
|
336
|
+
|
|
337
|
+
|
|
88
338
|
def default_size(value: Any) -> int:
|
|
89
339
|
try:
|
|
90
340
|
return len(value)
|
|
@@ -182,9 +432,7 @@ class ReductionProblem[T](ABC):
|
|
|
182
432
|
# Cache of View objects for each Format, to avoid re-parsing
|
|
183
433
|
self.__view_cache: dict[Any, ReductionProblem[Any]] = {}
|
|
184
434
|
|
|
185
|
-
def view(
|
|
186
|
-
self, format: "Format[T, S] | type[Format[T, S]]"
|
|
187
|
-
) -> "ReductionProblem[S]":
|
|
435
|
+
def view(self, format: Format[T, S] | type[Format[T, S]]) -> "ReductionProblem[S]":
|
|
188
436
|
"""Create a view of this problem through a Format.
|
|
189
437
|
|
|
190
438
|
A View wraps this problem, parsing the current test case through
|
|
@@ -481,8 +729,6 @@ class View[S, T](ReductionProblem[T]):
|
|
|
481
729
|
return self.__current
|
|
482
730
|
|
|
483
731
|
async def is_interesting(self, test_case: T) -> bool:
|
|
484
|
-
from shrinkray.passes.definitions import DumpError
|
|
485
|
-
|
|
486
732
|
try:
|
|
487
733
|
return await self.__problem.is_interesting(self.__dump(test_case))
|
|
488
734
|
except DumpError:
|
shrinkray/reducer.py
CHANGED
|
@@ -49,7 +49,12 @@ from shrinkray.passes.patching import PatchApplier, Patches
|
|
|
49
49
|
from shrinkray.passes.python import PYTHON_PASSES, is_python
|
|
50
50
|
from shrinkray.passes.sat import SAT_PASSES, DimacsCNF
|
|
51
51
|
from shrinkray.passes.sequences import block_deletion, delete_duplicates
|
|
52
|
-
from shrinkray.problem import
|
|
52
|
+
from shrinkray.problem import (
|
|
53
|
+
ReductionProblem,
|
|
54
|
+
ReductionStats,
|
|
55
|
+
shortlex,
|
|
56
|
+
sort_key_for_initial,
|
|
57
|
+
)
|
|
53
58
|
|
|
54
59
|
|
|
55
60
|
@define
|
|
@@ -531,6 +536,8 @@ class KeyProblem(ReductionProblem[bytes]):
|
|
|
531
536
|
self.base_problem = base_problem
|
|
532
537
|
self.applier = applier
|
|
533
538
|
self.key = key
|
|
539
|
+
# Use the appropriate sort key for this value (natural for text, shortlex for binary)
|
|
540
|
+
self._sort_key_fn = sort_key_for_initial(self.current_test_case)
|
|
534
541
|
|
|
535
542
|
@property
|
|
536
543
|
def current_test_case(self) -> bytes:
|
|
@@ -547,7 +554,7 @@ class KeyProblem(ReductionProblem[bytes]):
|
|
|
547
554
|
return len(test_case)
|
|
548
555
|
|
|
549
556
|
def sort_key(self, test_case: bytes) -> Any:
|
|
550
|
-
return
|
|
557
|
+
return self._sort_key_fn(test_case)
|
|
551
558
|
|
|
552
559
|
def display(self, value: bytes) -> str:
|
|
553
560
|
return repr(value)
|