shrinkray 0.0.0__py3-none-any.whl → 25.12.26.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,18 @@
1
1
  from collections import defaultdict
2
- from typing import Any, Sequence, TypeVar
2
+ from collections.abc import Sequence
3
+ from typing import Any
3
4
 
4
5
  from shrinkray.passes.definitions import ReductionPass
5
6
  from shrinkray.passes.patching import CutPatch, Cuts, apply_patches
6
7
  from shrinkray.problem import ReductionProblem
7
8
 
8
- Seq = TypeVar("Seq", bound=Sequence[Any])
9
9
 
10
+ async def delete_elements[Seq: Sequence[Any]](problem: ReductionProblem[Seq]) -> None:
11
+ """Try to delete individual elements from the sequence.
10
12
 
11
- async def delete_elements(problem: ReductionProblem[Seq]) -> None:
13
+ Creates a patch for each element and uses the patch applier to find
14
+ which elements can be removed while maintaining interestingness.
15
+ """
12
16
  await apply_patches(
13
17
  problem, Cuts(), [[(i, i + 1)] for i in range(len(problem.current_test_case))]
14
18
  )
@@ -24,7 +28,9 @@ def merged_intervals(intervals: list[tuple[int, int]]) -> list[tuple[int, int]]:
24
28
  return list(map(tuple, normalized)) # type: ignore
25
29
 
26
30
 
27
- def with_deletions(target: Seq, deletions: list[tuple[int, int]]) -> Seq:
31
+ def with_deletions[Seq: Sequence[Any]](
32
+ target: Seq, deletions: list[tuple[int, int]]
33
+ ) -> Seq:
28
34
  result: list[Any] = []
29
35
  prev = 0
30
36
  total_deleted = 0
@@ -37,7 +43,15 @@ def with_deletions(target: Seq, deletions: list[tuple[int, int]]) -> Seq:
37
43
  return type(target)(result) # type: ignore
38
44
 
39
45
 
40
- def block_deletion(min_block: int, max_block: int) -> ReductionPass[Seq]:
46
+ def block_deletion[Seq: Sequence[Any]](
47
+ min_block: int, max_block: int
48
+ ) -> ReductionPass[Seq]:
49
+ """Create a pass that deletes contiguous blocks of elements.
50
+
51
+ Tries to remove blocks of size min_block to max_block, starting at
52
+ various offsets. Useful for removing larger chunks efficiently.
53
+ """
54
+
41
55
  async def apply(problem: ReductionProblem[Seq]) -> None:
42
56
  n = len(problem.current_test_case)
43
57
  if n <= min_block:
@@ -55,7 +69,13 @@ def block_deletion(min_block: int, max_block: int) -> ReductionPass[Seq]:
55
69
  return apply
56
70
 
57
71
 
58
- async def delete_duplicates(problem: ReductionProblem[Seq]) -> None:
72
+ async def delete_duplicates[Seq: Sequence[Any]](problem: ReductionProblem[Seq]) -> None:
73
+ """Try to delete duplicate elements from the sequence.
74
+
75
+ Groups elements by value and tries to remove all occurrences of each
76
+ duplicated element together. Effective when the test case contains
77
+ repeated patterns that can be eliminated.
78
+ """
59
79
  index: dict[int, list[int]] = defaultdict(list)
60
80
 
61
81
  for i, c in enumerate(problem.current_test_case):
shrinkray/problem.py CHANGED
@@ -1,8 +1,28 @@
1
+ """Core abstractions for test-case reduction.
2
+
3
+ This module defines the fundamental interfaces for reduction problems:
4
+
5
+ - ReductionProblem[T]: The central abstraction representing a reduction task
6
+ - BasicReductionProblem[T]: A concrete implementation with caching and callbacks
7
+ - View[S, T]: A problem wrapper that parses through a Format
8
+
9
+ The key insight is that all reduction is about finding the smallest test case
10
+ that satisfies an "interestingness" predicate. The problem abstraction hides
11
+ the details of caching, parallelism, and state management.
12
+ """
13
+
1
14
  import hashlib
2
15
  import time
3
- from abc import ABC, abstractmethod, abstractproperty
16
+ from abc import ABC, abstractmethod
17
+ from collections.abc import Awaitable, Callable, Sized
4
18
  from datetime import timedelta
5
- from typing import Any, Awaitable, Callable, Generic, Optional, TypeVar, cast
19
+ from typing import (
20
+ TYPE_CHECKING,
21
+ Any,
22
+ Protocol,
23
+ TypeVar,
24
+ cast,
25
+ )
6
26
 
7
27
  import attrs
8
28
  import trio
@@ -11,16 +31,48 @@ from humanize import naturalsize, precisedelta
11
31
 
12
32
  from shrinkray.work import WorkContext
13
33
 
34
+
35
+ if TYPE_CHECKING:
36
+ from shrinkray.passes.definitions import Format
37
+
14
38
  S = TypeVar("S")
15
39
  T = TypeVar("T")
16
40
 
17
41
 
18
- def shortlex(value: Any) -> Any:
42
+ class PassStatsProtocol(Protocol):
43
+ """Protocol for pass statistics tracking.
44
+
45
+ This allows problem.py to track stats without importing from reducer.py,
46
+ avoiding circular dependencies.
47
+ """
48
+
49
+ test_evaluations: int
50
+ successful_reductions: int
51
+ bytes_deleted: int
52
+
53
+
54
+ def shortlex[SizedT: Sized](value: SizedT) -> tuple[int, SizedT]:
55
+ """Return a comparison key for shortlex ordering.
56
+
57
+ Shortlex ordering compares first by length, then lexicographically.
58
+ This ensures shorter test cases are always preferred, and among
59
+ equal-length test cases, lexicographically smaller ones win.
60
+
61
+ This ordering is crucial for reproducibility: regardless of which
62
+ reduction path is taken, the final result should be the same minimal
63
+ test case.
64
+
65
+ Example:
66
+ >>> shortlex(b"aa") < shortlex(b"aaa") # shorter wins
67
+ True
68
+ >>> shortlex(b"ab") < shortlex(b"ba") # same length, lex order
69
+ True
70
+ """
19
71
  return (len(value), value)
20
72
 
21
73
 
22
74
  def default_sort_key(value: Any):
23
- if isinstance(value, (str, bytes)):
75
+ if isinstance(value, str | bytes):
24
76
  return shortlex(value)
25
77
  else:
26
78
  return shortlex(repr(value))
@@ -99,15 +151,57 @@ class ReductionStats:
99
151
 
100
152
 
101
153
  @define(slots=False)
102
- class ReductionProblem(Generic[T], ABC):
154
+ class ReductionProblem[T](ABC):
155
+ """Abstract base class representing a test-case reduction task.
156
+
157
+ A ReductionProblem encapsulates everything needed to reduce a test case:
158
+ - The current best-known interesting test case
159
+ - A predicate to test if candidates are "interesting" (trigger the bug)
160
+ - An ordering to determine which test cases are "smaller"
161
+
162
+ Reduction passes work by generating candidate test cases and calling
163
+ is_interesting() on them. When a smaller interesting test case is found,
164
+ current_test_case is automatically updated.
165
+
166
+ The problem maintains a cache of interestingness results and tracks
167
+ statistics about the reduction process.
168
+
169
+ Subclasses must implement:
170
+ - current_test_case: Property returning the current best test case
171
+ - is_interesting(test_case): Async method testing if a candidate works
172
+ - sort_key(test_case): Returns a comparable key for ordering
173
+ - size(test_case): Returns the size of a test case
174
+ - display(value): Returns a human-readable representation
175
+ """
176
+
103
177
  work: WorkContext
178
+ # Track current pass stats for real-time updates (set by reducer)
179
+ current_pass_stats: PassStatsProtocol | None = None
104
180
 
105
181
  def __attrs_post_init__(self) -> None:
182
+ # Cache of View objects for each Format, to avoid re-parsing
106
183
  self.__view_cache: dict[Any, ReductionProblem[Any]] = {}
107
184
 
108
185
  def view(
109
186
  self, format: "Format[T, S] | type[Format[T, S]]"
110
187
  ) -> "ReductionProblem[S]":
188
+ """Create a view of this problem through a Format.
189
+
190
+ A View wraps this problem, parsing the current test case through
191
+ the format's parse() method and serializing candidates back through
192
+ dumps(). This allows format-specific passes to work on structured
193
+ data while the underlying problem operates on bytes.
194
+
195
+ Example:
196
+ # Work on lines instead of raw bytes
197
+ line_problem = byte_problem.view(Split(b"\\n"))
198
+
199
+ # Work on JSON structure
200
+ json_problem = byte_problem.view(JSON)
201
+
202
+ Views are cached: calling view() with the same format returns the
203
+ same View object, avoiding redundant parsing.
204
+ """
111
205
  try:
112
206
  return cast(ReductionProblem[S], self.__view_cache[format])
113
207
  except KeyError:
@@ -124,17 +218,32 @@ class ReductionProblem(Generic[T], ABC):
124
218
 
125
219
  return cast(ReductionProblem[S], self.__view_cache.setdefault(format, result))
126
220
 
127
- async def setup(self) -> None:
128
- pass
221
+ async def setup(self) -> None: # noqa: B027
222
+ """Initialize the problem before reduction begins.
223
+
224
+ Subclasses may override this to perform validation or initialization.
225
+ The default implementation does nothing.
226
+ """
129
227
 
130
- @abstractproperty
228
+ @property
229
+ @abstractmethod
131
230
  def current_test_case(self) -> T: ...
132
231
 
232
+ @property
233
+ @abstractmethod
234
+ def stats(self) -> ReductionStats: ...
235
+
133
236
  @abstractmethod
134
237
  async def is_interesting(self, test_case: T) -> bool:
135
238
  pass
136
239
 
137
240
  async def is_reduction(self, test_case: T) -> bool:
241
+ """Check if test_case would be a valid reduction from current state.
242
+
243
+ A valid reduction is an interesting test case that is smaller than
244
+ the current one (according to sort_key). This is a convenience method
245
+ that short-circuits if the candidate is larger.
246
+ """
138
247
  if test_case == self.current_test_case:
139
248
  return True
140
249
  if self.sort_key(test_case) > self.sort_key(self.current_test_case):
@@ -156,6 +265,23 @@ class ReductionProblem(Generic[T], ABC):
156
265
  def display(self, value: T) -> str: ...
157
266
 
158
267
  def backtrack(self, new_test_case: T) -> "ReductionProblem[T]":
268
+ """Create a new problem starting from a different test case.
269
+
270
+ This is used by reduction pumps to try larger test cases temporarily.
271
+ The new problem shares the same is_interesting predicate but starts
272
+ from new_test_case instead of current_test_case.
273
+
274
+ If reduction succeeds and the result is smaller than the original
275
+ current_test_case, it can be adopted into the main problem.
276
+
277
+ Example:
278
+ # Pump inlines a function, making code larger
279
+ pumped = await pump(problem) # Returns larger test case
280
+ backtracked = problem.backtrack(pumped)
281
+ # Try to reduce the larger test case
282
+ await run_passes(backtracked)
283
+ # If result is smaller than original, keep it
284
+ """
159
285
  return BasicReductionProblem(
160
286
  initial=new_test_case,
161
287
  is_interesting=self.is_interesting,
@@ -181,6 +307,20 @@ def default_cache_key(value: Any) -> str:
181
307
 
182
308
 
183
309
  class BasicReductionProblem(ReductionProblem[T]):
310
+ """Concrete implementation of ReductionProblem for in-memory reduction.
311
+
312
+ This is the main implementation used by Shrink Ray. It provides:
313
+ - Caching of interestingness results (by content hash)
314
+ - Statistics tracking (calls, cache hits, timing)
315
+ - Callbacks for reduction events
316
+ - Automatic cache clearing when a reduction succeeds
317
+
318
+ The cache clearing is a practical choice: when we find a smaller test case,
319
+ cached results for candidates derived from the old test case are no longer
320
+ useful (we're now reducing from a different starting point). Clearing the
321
+ cache saves memory and avoids serving stale cache entries that won't help.
322
+ """
323
+
184
324
  def __init__(
185
325
  self,
186
326
  initial: T,
@@ -189,7 +329,7 @@ class BasicReductionProblem(ReductionProblem[T]):
189
329
  sort_key: Callable[[T], Any] = default_sort_key,
190
330
  size: Callable[[T], int] = default_size,
191
331
  display: Callable[[T], str] = default_display,
192
- stats: Optional[ReductionStats] = None,
332
+ stats: ReductionStats | None = None,
193
333
  cache_key: Callable[[Any], str] = default_cache_key,
194
334
  ):
195
335
  super().__init__(work=work)
@@ -198,11 +338,11 @@ class BasicReductionProblem(ReductionProblem[T]):
198
338
  self.__size = size
199
339
  self.__display = display
200
340
  if stats is None:
201
- self.stats = ReductionStats()
202
- self.stats.initial_test_case_size = self.size(initial)
203
- self.stats.current_test_case_size = self.size(initial)
341
+ self._stats = ReductionStats()
342
+ self._stats.initial_test_case_size = self.size(initial)
343
+ self._stats.current_test_case_size = self.size(initial)
204
344
  else:
205
- self.stats = stats
345
+ self._stats = stats
206
346
 
207
347
  self.__is_interesting_cache: dict[str, bool] = {}
208
348
  self.__cache_key = cache_key
@@ -223,6 +363,10 @@ class BasicReductionProblem(ReductionProblem[T]):
223
363
  def display(self, value: T) -> str:
224
364
  return self.__display(value)
225
365
 
366
+ @property
367
+ def stats(self) -> ReductionStats:
368
+ return self._stats
369
+
226
370
  def sort_key(self, test_case: T) -> Any:
227
371
  return self.__sort_key(test_case)
228
372
 
@@ -234,31 +378,43 @@ class BasicReductionProblem(ReductionProblem[T]):
234
378
  call `fn` with the new value. Note that these are called outside the lock."""
235
379
  self.__on_reduce_callbacks.append(callback)
236
380
 
237
- async def is_interesting(self, value: T) -> bool:
238
- """Returns true if this value is interesting."""
381
+ async def is_interesting(self, test_case: T) -> bool:
382
+ """Returns true if this test_case is interesting."""
239
383
  await trio.lowlevel.checkpoint()
240
- if value == self.current_test_case:
384
+ if test_case == self.current_test_case:
241
385
  return True
242
- cache_key = self.__cache_key(value)
386
+ cache_key = self.__cache_key(test_case)
243
387
  try:
244
388
  return self.__is_interesting_cache[cache_key]
245
389
  except KeyError:
246
390
  pass
247
- result = await self.__is_interesting(value)
391
+ result = await self.__is_interesting(test_case)
248
392
  self.__is_interesting_cache[cache_key] = result
249
393
  self.stats.failed_reductions += 1
250
394
  self.stats.calls += 1
395
+
396
+ # Update current pass stats if a pass is running
397
+ if self.current_pass_stats is not None:
398
+ self.current_pass_stats.test_evaluations += 1
399
+
251
400
  if result:
252
401
  self.stats.interesting_calls += 1
253
- if self.sort_key(value) < self.sort_key(self.current_test_case):
402
+ if self.sort_key(test_case) < self.sort_key(self.current_test_case):
254
403
  self.__is_interesting_cache.clear()
255
404
  self.stats.failed_reductions -= 1
256
405
  self.stats.reductions += 1
257
406
  self.stats.time_of_last_reduction = time.time()
258
- self.stats.current_test_case_size = self.size(value)
259
- self.__current = value
407
+
408
+ # Update current pass stats for reductions
409
+ if self.current_pass_stats is not None:
410
+ self.current_pass_stats.successful_reductions += 1
411
+ size_diff = self.size(self.current_test_case) - self.size(test_case)
412
+ self.current_pass_stats.bytes_deleted += size_diff
413
+
414
+ self.stats.current_test_case_size = self.size(test_case)
415
+ self.__current = test_case
260
416
  for f in self.__on_reduce_callbacks:
261
- await f(value)
417
+ await f(test_case)
262
418
  else:
263
419
  self.stats.wasted_interesting_calls += 1
264
420
  return result
@@ -268,14 +424,32 @@ class BasicReductionProblem(ReductionProblem[T]):
268
424
  return self.__current
269
425
 
270
426
 
271
- class View(ReductionProblem[T], Generic[S, T]):
427
+ class View[S, T](ReductionProblem[T]):
428
+ """A view of a ReductionProblem through a parse/dump transformation.
429
+
430
+ View wraps an underlying problem, presenting it as a different type.
431
+ For example, a problem over bytes can be viewed as a problem over
432
+ lists of lines, or JSON structures, or AST nodes.
433
+
434
+ The View:
435
+ - Parses the underlying problem's test case on access
436
+ - Dumps candidates back to the underlying type for testing
437
+ - Caches the parsed representation for efficiency
438
+ - Delegates interestingness testing to the underlying problem
439
+
440
+ The caching is subtle: when the underlying problem's test case changes,
441
+ the View re-parses it. But it only updates its cached value if the new
442
+ parsed value is "smaller" (according to sort_key), to maintain
443
+ monotonicity of reduction.
444
+ """
445
+
272
446
  def __init__(
273
447
  self,
274
448
  problem: ReductionProblem[S],
275
449
  parse: Callable[[S], T],
276
450
  dump: Callable[[T], S],
277
- work: Optional[WorkContext] = None,
278
- sort_key: Optional[Callable[[T], Any]] = None,
451
+ work: WorkContext | None = None,
452
+ sort_key: Callable[[T], Any] | None = None,
279
453
  ):
280
454
  super().__init__(work=work or problem.work)
281
455
  self.__problem = problem
@@ -292,7 +466,7 @@ class View(ReductionProblem[T], Generic[S, T]):
292
466
 
293
467
  @property
294
468
  def stats(self) -> ReductionStats:
295
- return self.__problem.stats # type: ignore
469
+ return self.__problem.stats
296
470
 
297
471
  @property
298
472
  def current_test_case(self) -> T:
@@ -307,7 +481,12 @@ class View(ReductionProblem[T], Generic[S, T]):
307
481
  return self.__current
308
482
 
309
483
  async def is_interesting(self, test_case: T) -> bool:
310
- return await self.__problem.is_interesting(self.__dump(test_case))
484
+ from shrinkray.passes.definitions import DumpError
485
+
486
+ try:
487
+ return await self.__problem.is_interesting(self.__dump(test_case))
488
+ except DumpError:
489
+ return False
311
490
 
312
491
  def sort_key(self, test_case: T) -> Any:
313
492
  if self.__sort_key is not None:
shrinkray/process.py ADDED
@@ -0,0 +1,49 @@
1
+ """Process management utilities for shrink ray."""
2
+
3
+ import os
4
+ import random
5
+ import signal
6
+
7
+ import trio
8
+
9
+
10
+ def signal_group(sp: "trio.Process", sig: int) -> None:
11
+ """Send a signal to a process group."""
12
+ gid = os.getpgid(sp.pid)
13
+ assert gid != os.getgid()
14
+ os.killpg(gid, sig)
15
+
16
+
17
+ async def interrupt_wait_and_kill(sp: "trio.Process", delay: float = 0.1) -> None:
18
+ """Interrupt a process, wait for it to exit, and kill it if necessary."""
19
+ await trio.lowlevel.checkpoint()
20
+ if sp.returncode is None:
21
+ try:
22
+ # In case the subprocess forked. Python might hang if you don't close
23
+ # all pipes.
24
+ for pipe in [sp.stdout, sp.stderr, sp.stdin]:
25
+ if pipe:
26
+ await pipe.aclose()
27
+ signal_group(sp, signal.SIGINT)
28
+ for n in range(10):
29
+ if sp.poll() is not None:
30
+ return
31
+ await trio.sleep(delay * 1.5**n * random.random())
32
+ except ProcessLookupError: # pragma: no cover
33
+ # This is incredibly hard to trigger reliably, because it only happens
34
+ # if the process exits at exactly the wrong time.
35
+ pass
36
+
37
+ if sp.returncode is None:
38
+ try:
39
+ signal_group(sp, signal.SIGKILL)
40
+ except ProcessLookupError:
41
+ pass
42
+
43
+ with trio.move_on_after(delay):
44
+ await sp.wait()
45
+
46
+ if sp.returncode is None:
47
+ raise ValueError(
48
+ f"Could not kill subprocess with pid {sp.pid}. Something has gone seriously wrong."
49
+ )