PyPI - shrinkray - Versions diffs - 25.12.27.1__py3-none-any.whl → 25.12.27.3__py3-none-any.whl - Mend

shrinkray 25.12.27.1py3-none-any.whl → 25.12.27.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

shrinkray/__main__.py +25 -11
shrinkray/passes/bytes.py +8 -7
shrinkray/passes/definitions.py +3 -67
shrinkray/passes/genericlanguages.py +14 -10
shrinkray/passes/json.py +2 -2
shrinkray/passes/sat.py +2 -7
shrinkray/problem.py +257 -11
shrinkray/reducer.py +9 -2
shrinkray/state.py +199 -67
shrinkray/subprocess/client.py +2 -0
shrinkray/subprocess/protocol.py +8 -0
shrinkray/subprocess/worker.py +67 -17
shrinkray/tui.py +114 -92
shrinkray/validation.py +403 -0
{shrinkray-25.12.27.1.dist-info → shrinkray-25.12.27.3.dist-info}/METADATA +1 -28
shrinkray-25.12.27.3.dist-info/RECORD +34 -0
shrinkray-25.12.27.1.dist-info/RECORD +0 -33
{shrinkray-25.12.27.1.dist-info → shrinkray-25.12.27.3.dist-info}/WHEEL +0 -0
{shrinkray-25.12.27.1.dist-info → shrinkray-25.12.27.3.dist-info}/entry_points.txt +0 -0
{shrinkray-25.12.27.1.dist-info → shrinkray-25.12.27.3.dist-info}/licenses/LICENSE +0 -0
{shrinkray-25.12.27.1.dist-info → shrinkray-25.12.27.3.dist-info}/top_level.txt +0 -0

shrinkray/__main__.py CHANGED Viewed

@@ -17,18 +17,19 @@ from shrinkray.cli import (
     validate_command,
     validate_ui,
 )
+from shrinkray.formatting import determine_formatter_command
 from shrinkray.passes.clangdelta import (
     C_FILE_EXTENSIONS,
     ClangDelta,
     find_clang_delta,
 )
-from shrinkray.problem import InvalidInitialExample
 from shrinkray.state import (
     ShrinkRayDirectoryState,
     ShrinkRayState,
     ShrinkRayStateSingleFile,
 )
 from shrinkray.ui import BasicUI, ShrinkRayUI
+from shrinkray.validation import run_validation
 from shrinkray.work import Volume
@@ -39,12 +40,9 @@ async def run_shrink_ray(
     """Run the shrink ray reduction process."""
     async with trio.open_nursery() as nursery:
         problem = state.problem
-        try:
-            await problem.setup()
-        except* InvalidInitialExample as excs:
-            assert len(excs.exceptions) == 1
-            (e,) = excs.exceptions
-            await state.report_error(e)
+        # Validation runs before run_shrink_ray is called, so setup() should
+        # always succeed. If it doesn't, there's a bug and we want it to propagate.
+        await problem.setup()
         reducer = state.reducer
@@ -273,6 +271,26 @@ def main(
     if not backup:
         backup = filename + os.extsep + "bak"
+    # Run initial validation before any state setup
+    # This validates the interestingness test and formatter with proper output streaming
+    formatter_command = None
+    if not os.path.isdir(filename) and formatter.lower() != "none":
+        formatter_command = determine_formatter_command(formatter, filename)
+    validation_result = run_validation(
+        file_path=filename,
+        test=test,
+        input_type=input_type,
+        in_place=in_place,
+        formatter_command=formatter_command,
+    )
+    if not validation_result.success:
+        print(f"\nError: {validation_result.error_message}", file=sys.stderr)
+        sys.exit(1)
+    print("\nStarting reduction...", file=sys.stderr, flush=True)
     state_kwargs: dict[str, Any] = {
         "input_type": input_type,
         "in_place": in_place,
@@ -307,8 +325,6 @@ def main(
         state = ShrinkRayDirectoryState(initial=initial, **state_kwargs)
-        trio.run(state.check_formatter)
     else:
         try:
             os.remove(backup)
@@ -323,8 +339,6 @@ def main(
         state = ShrinkRayStateSingleFile(initial=initial, **state_kwargs)
-        trio.run(state.check_formatter)
     if ui_type == UIType.textual:
         from shrinkray.tui import run_textual_ui

shrinkray/passes/bytes.py CHANGED Viewed

@@ -24,8 +24,8 @@ from collections.abc import Sequence
 from attrs import define
-from shrinkray.passes.definitions import Format, ReductionProblem
 from shrinkray.passes.patching import Cuts, Patches, apply_patches
+from shrinkray.problem import Format, ReductionProblem
 @define(frozen=True)
@@ -739,12 +739,13 @@ async def line_sorter(problem: ReductionProblem[bytes]):
     while i < len(lines):
         j = i
         while j > 0:
-            u = lines[j - 1]
-            v = lines[j]
-            if v + u < u + v:
-                attempt = list(lines)
-                attempt[j - 1], attempt[j] = attempt[j], attempt[j - 1]
-                if not await problem.is_interesting(b"\n".join(attempt)):
+            attempt = list(lines)
+            attempt[j - 1], attempt[j] = attempt[j], attempt[j - 1]
+            new_test_case = b"\n".join(attempt)
+            if problem.sort_key(new_test_case) < problem.sort_key(
+                problem.current_test_case
+            ):
+                if not await problem.is_interesting(new_test_case):
                     break
                 else:
                     j -= 1

shrinkray/passes/definitions.py CHANGED Viewed

@@ -4,20 +4,20 @@ This module defines the core type aliases and abstractions for reduction:
 - ReductionPass[T]: A function that attempts to reduce a test case
 - ReductionPump[T]: A function that may temporarily increase test case size
-- Format[S, T]: A bidirectional transformation between types
 - compose(): Combines a Format with a pass to work on a different type
 These abstractions enable format-agnostic reduction: the same pass
 (e.g., "delete duplicate elements") can work on bytes, lines, tokens,
 JSON arrays, or any other sequence-like type.
+Note: Format, ParseError, and DumpError are defined in shrinkray.problem.
 """
-from abc import ABC, abstractmethod
 from collections.abc import Awaitable, Callable
 from functools import wraps
 from typing import TypeVar
-from shrinkray.problem import ReductionProblem
+from shrinkray.problem import Format, ParseError, ReductionProblem
 S = TypeVar("S")
@@ -36,70 +36,6 @@ ReductionPass = Callable[[ReductionProblem[T]], Awaitable[None]]
 ReductionPump = Callable[[ReductionProblem[T]], Awaitable[T]]
-class ParseError(Exception):
-    """Raised when a Format cannot parse its input."""
-    pass
-class DumpError(Exception):
-    """Raised when a Format cannot serialize its output.
-    This occurs because not all internal representations map to valid
-    output in the target format. For example, a reduction might create
-    an invalid AST structure that cannot be converted back to source code.
-    """
-    pass
-class Format[S, T](ABC):
-    """A bidirectional transformation between two types.
-    Formats enable format-agnostic passes by abstracting the
-    parse/serialize cycle. For example:
-    - Split(b"\\n"): bytes <-> list[bytes] (lines)
-    - Tokenize(): bytes <-> list[bytes] (tokens)
-    - JSON: bytes <-> Any (Python objects)
-    - DimacsCNF: bytes <-> list[list[int]] (SAT clauses)
-    A Format must satisfy the round-trip property:
-        dumps(parse(x)) should be equivalent to x
-        (possibly with normalization)
-    Example usage:
-        # Delete duplicate lines
-        compose(Split(b"\\n"), delete_duplicates)
-        # Reduce integer literals in source code
-        compose(IntegerFormat(), reduce_integer)
-    """
-    @property
-    def name(self) -> str:
-        """Human-readable name for this format, used in pass names."""
-        return repr(self)
-    @abstractmethod
-    def parse(self, input: S) -> T:
-        """Parse input into the target type. Raises ParseError on failure."""
-        ...
-    def is_valid(self, input: S) -> bool:
-        """Check if input can be parsed by this format."""
-        try:
-            self.parse(input)
-            return True
-        except ParseError:
-            return False
-    @abstractmethod
-    def dumps(self, input: T) -> S:
-        """Serialize the target type back to the source type."""
-        ...
 def compose(format: Format[S, T], reduction_pass: ReductionPass[T]) -> ReductionPass[S]:
     """Wrap a reduction pass to work through a Format transformation.

shrinkray/passes/genericlanguages.py CHANGED Viewed

@@ -3,7 +3,7 @@ Module of reduction passes designed for "things that look like programming langu
 """
 import re
-from collections.abc import Callable, Sized
+from collections.abc import Callable
 from functools import wraps
 from string import ascii_lowercase, ascii_uppercase
 from typing import AnyStr
@@ -12,9 +12,15 @@ import trio
 from attr import define
 from shrinkray.passes.bytes import ByteReplacement, delete_intervals
-from shrinkray.passes.definitions import Format, ParseError, ReductionPass
+from shrinkray.passes.definitions import ReductionPass
 from shrinkray.passes.patching import PatchApplier, Patches, apply_patches
-from shrinkray.problem import BasicReductionProblem, ReductionProblem
+from shrinkray.problem import (
+    BasicReductionProblem,
+    Format,
+    ParseError,
+    ReductionProblem,
+    sort_key_for_initial,
+)
 from shrinkray.work import NotFound
@@ -240,10 +246,6 @@ async def simplify_brackets(problem: ReductionProblem[bytes]) -> None:
 IDENTIFIER = re.compile(rb"(\b[A-Za-z][A-Za-z0-9_]*\b)|([0-9]+)")
-def shortlex[T: Sized](s: T) -> tuple[int, T]:
-    return (len(s), s)
 async def normalize_identifiers(problem: ReductionProblem[bytes]) -> None:
     """Replace identifiers with shorter alternatives.
@@ -261,8 +263,10 @@ async def normalize_identifiers(problem: ReductionProblem[bytes]) -> None:
                 replacements.add(c)
                 break
-    replacements = sorted(replacements, key=shortlex)
-    targets = sorted(identifiers, key=shortlex, reverse=True)
+    sort_key = sort_key_for_initial(problem.current_test_case)
+    replacements = sorted(replacements, key=sort_key)
+    targets = sorted(identifiers, key=sort_key, reverse=True)
     # TODO: This could use better parallelisation.
     for t in targets:
@@ -272,7 +276,7 @@ async def normalize_identifiers(problem: ReductionProblem[bytes]) -> None:
             continue
         async def can_replace(r):
-            if shortlex(r) >= shortlex(t):
+            if sort_key(r) >= sort_key(t):
                 return False
             attempt = pattern.sub(r, source)
             assert attempt != source

shrinkray/passes/json.py CHANGED Viewed

@@ -4,9 +4,9 @@ from typing import Any
 from attrs import define
-from shrinkray.passes.definitions import Format, ParseError, ReductionPass
+from shrinkray.passes.definitions import ReductionPass
 from shrinkray.passes.patching import Patches, apply_patches
-from shrinkray.problem import ReductionProblem
+from shrinkray.problem import Format, ParseError, ReductionProblem
 def is_json(s: bytes) -> bool:

shrinkray/passes/sat.py CHANGED Viewed

@@ -1,15 +1,10 @@
 from collections import Counter, defaultdict
 from collections.abc import Callable, Iterable, Iterator
-from shrinkray.passes.definitions import (
-    DumpError,
-    Format,
-    ParseError,
-    ReductionPass,
-)
+from shrinkray.passes.definitions import ReductionPass
 from shrinkray.passes.patching import Conflict, SetPatches, apply_patches
 from shrinkray.passes.sequences import delete_elements
-from shrinkray.problem import ReductionProblem
+from shrinkray.problem import DumpError, Format, ParseError, ReductionProblem
 Clause = list[int]

shrinkray/problem.py CHANGED Viewed

@@ -12,12 +12,13 @@ the details of caching, parallelism, and state management.
 """
 import hashlib
+import string
 import time
 from abc import ABC, abstractmethod
 from collections.abc import Awaitable, Callable, Sized
 from datetime import timedelta
+from functools import total_ordering
 from typing import (
-    TYPE_CHECKING,
     Any,
     Protocol,
     TypeVar,
@@ -29,12 +30,10 @@ import trio
 from attrs import define
 from humanize import naturalsize, precisedelta
+from shrinkray.formatting import try_decode
 from shrinkray.work import WorkContext
-if TYPE_CHECKING:
-    from shrinkray.passes.definitions import Format
 S = TypeVar("S")
 T = TypeVar("T")
@@ -71,9 +70,196 @@ def shortlex[SizedT: Sized](value: SizedT) -> tuple[int, SizedT]:
     return (len(value), value)
-def default_sort_key(value: Any):
-    if isinstance(value, str | bytes):
+@total_ordering
+class LazyChainedSortKey:
+    """A comparison key that lazily evaluates a chain of comparison functions.
+    This class provides an ordering that compares values by applying a sequence
+    of functions in order. The first function that produces different values
+    for two inputs determines the ordering. If all functions return equal
+    values, the inputs are considered equal.
+    This is used to implement the natural ordering for strings, which compares
+    by length, then average squared line length, then number of lines, etc.
+    The "lazy" aspect is that comparison functions are only evaluated until
+    one returns different values, avoiding unnecessary computation.
+    """
+    def __init__(self, functions: list[Callable[[T], Any]], value: T):
+        self.functions = functions
+        self.value = value
+    def __eq__(self, other):
+        if not isinstance(other, LazyChainedSortKey):
+            return NotImplemented
+        assert len(self.functions) == len(other.functions)
+        return self.value == other.value
+    def __lt__(self, other):
+        if self == other:
+            return False
+        if not isinstance(other, LazyChainedSortKey):
+            return NotImplemented
+        for f in self.functions:
+            self_key = f(self.value)
+            other_key = f(other.value)
+            if self_key < other_key:
+                return True
+            elif self_key > other_key:
+                return False
+        # All comparison functions returned equal values for different inputs.
+        # This shouldn't happen with the current functions (natural_string_lex
+        # compares character-by-character) but if it does, neither is less.
+        return False
+# Natural character ordering: whitespace < digits < lowercase < uppercase.
+# Characters not in this string are sorted by ord() after all known characters.
+NATURAL_CHARACTER_ORDER = (
+    string.whitespace + string.digits + string.ascii_lowercase + string.ascii_uppercase
+)
+NATURAL_CHARACTER_ORDER_INDEX = {s: i for i, s in enumerate(NATURAL_CHARACTER_ORDER)}
+def character_index(c: str) -> int:
+    """Return the sorting index for a character in natural ordering.
+    Characters in NATURAL_CHARACTER_ORDER get their position in that string.
+    Unknown characters (punctuation, unicode, etc.) sort after all known
+    characters, ordered by their Unicode code point.
+    """
+    return NATURAL_CHARACTER_ORDER_INDEX.get(c, len(NATURAL_CHARACTER_ORDER) + ord(c))
+def natural_string_lex(s: str) -> list[int]:
+    """Convert a string to a list of character indices for lexicographic comparison.
+    This transforms the string so that comparing the resulting lists gives
+    the natural character ordering (whitespace < digits < lowercase < uppercase).
+    """
+    return list(map(character_index, s))
+# The chain of comparison functions used for natural string ordering.
+# Each function is tried in sequence; the first that differs determines order.
+#
+# 1. Total length - shorter strings are always preferred
+# 2. Average squared line length - penalizes very long lines, preferring balanced code
+#    Formula: sum(len(line)²) / count(lines)²
+# 3. Number of lines - fewer lines is better (after accounting for balance)
+# 4. List of line lengths - lexicographically compare line length sequences
+# 5. Natural character order - whitespace < digits < lowercase < uppercase
+NATURAL_ORDERING_FUNCTIONS: list[Callable[[str], Any]] = [
+    len,
+    lambda s: sum(len(line) ** 2 for line in s.split("\n")) / len(s.split("\n")) ** 2,
+    lambda s: len(s.splitlines()),
+    lambda s: list(map(len, s.splitlines())),
+    natural_string_lex,
+]
+def natural_key(s: str) -> LazyChainedSortKey:
+    """Return a comparison key for natural string ordering.
+    Natural ordering uses a chain of heuristics to determine which string
+    is "smaller" (more reduced). This is designed to produce human-readable
+    minimal test cases with balanced line lengths and natural character choices.
+    See NATURAL_ORDERING_FUNCTIONS for the complete ordering criteria.
+    """
+    return LazyChainedSortKey(functions=NATURAL_ORDERING_FUNCTIONS, value=s)
+def sort_key_for_initial(initial: Any) -> Callable[[Any], Any]:
+    """Create a sort key function appropriate for the given initial value.
+    This examines the initial test case and returns a comparison function
+    that will be used to order all test cases during reduction.
+    For bytes:
+        - If decodable as text, uses natural ordering on the decoded string
+        - Falls back to shortlex for binary data that can't be decoded
+    For dicts:
+        - Orders by total size of values, then number of keys
+        - Then compares values for each key in order of largest-first
+    For other types:
+        - Falls back to natural ordering on repr()
+    The returned function can be used as a sort key or comparison key.
+    """
+    if isinstance(initial, bytes):
+        encoding, _ = try_decode(initial)
+        if encoding is None:
+            return shortlex
+        else:
+            def natural_for_encoding(b: bytes) -> Any:
+                try:
+                    s = b.decode(encoding)
+                    return (0, natural_key(s))
+                except UnicodeDecodeError:
+                    return (1, shortlex(b))
+            return natural_for_encoding
+    elif isinstance(initial, dict):
+        keys = sorted(initial, key=lambda k: shortlex(initial[k]), reverse=True)
+        natural_keys = {k: sort_key_for_initial(v) for k, v in initial.items()}
+        def dict_total_size(s):
+            return sum(len(v) for v in s.values())
+        def key_sort_key(k):
+            def f(x):
+                try:
+                    v = x[k]
+                except KeyError:
+                    return (0,)
+                else:
+                    return (1, natural_keys[k](v))
+            return f
+        functions = [
+            dict_total_size,
+            len,
+        ] + [key_sort_key(k) for k in keys]
+        def dict_sort_key(v):
+            return LazyChainedSortKey(
+                functions=functions,
+                value=v,
+            )
+        return dict_sort_key
+    else:
+        # We don't use this branch in the main app, but this
+        # function is also used in tests.
+        def fallback_sort_key(s):
+            return natural_key(repr(s))
+        return fallback_sort_key
+def default_sort_key(value: Any) -> Any:
+    """Return a comparison key for a value using type-appropriate ordering.
+    This is a simpler alternative to sort_key_for_initial that doesn't
+    examine the initial value to determine the best ordering.
+    - bytes: shortlex ordering (length, then lexicographic)
+    - str: natural ordering (length, line balance, character order)
+    - other: shortlex on repr()
+    Note: This really should return some sort of Comparable type, but Python
+    doesn't have a built-in protocol for that.
+    """
+    if isinstance(value, bytes):
         return shortlex(value)
+    elif isinstance(value, str):
+        return natural_key(value)
     else:
         return shortlex(repr(value))
@@ -85,6 +271,70 @@ def default_display(value: Any) -> str:
     return f"value of size {len(value)}"
+class ParseError(Exception):
+    """Raised when a Format cannot parse its input."""
+    pass
+class DumpError(Exception):
+    """Raised when a Format cannot serialize its output.
+    This occurs because not all internal representations map to valid
+    output in the target format. For example, a reduction might create
+    an invalid AST structure that cannot be converted back to source code.
+    """
+    pass
+class Format[S, T](ABC):
+    """A bidirectional transformation between two types.
+    Formats enable format-agnostic passes by abstracting the
+    parse/serialize cycle. For example:
+    - Split(b"\\n"): bytes <-> list[bytes] (lines)
+    - Tokenize(): bytes <-> list[bytes] (tokens)
+    - JSON: bytes <-> Any (Python objects)
+    - DimacsCNF: bytes <-> list[list[int]] (SAT clauses)
+    A Format must satisfy the round-trip property:
+        dumps(parse(x)) should be equivalent to x
+        (possibly with normalization)
+    Example usage:
+        # Delete duplicate lines
+        compose(Split(b"\\n"), delete_duplicates)
+        # Reduce integer literals in source code
+        compose(IntegerFormat(), reduce_integer)
+    """
+    @property
+    def name(self) -> str:
+        """Human-readable name for this format, used in pass names."""
+        return repr(self)
+    @abstractmethod
+    def parse(self, input: S) -> T:
+        """Parse input into the target type. Raises ParseError on failure."""
+        ...
+    def is_valid(self, input: S) -> bool:
+        """Check if input can be parsed by this format."""
+        try:
+            self.parse(input)
+            return True
+        except ParseError:
+            return False
+    @abstractmethod
+    def dumps(self, input: T) -> S:
+        """Serialize the target type back to the source type."""
+        ...
 def default_size(value: Any) -> int:
     try:
         return len(value)
@@ -182,9 +432,7 @@ class ReductionProblem[T](ABC):
         # Cache of View objects for each Format, to avoid re-parsing
         self.__view_cache: dict[Any, ReductionProblem[Any]] = {}
-    def view(
-        self, format: "Format[T, S] | type[Format[T, S]]"
-    ) -> "ReductionProblem[S]":
+    def view(self, format: Format[T, S] | type[Format[T, S]]) -> "ReductionProblem[S]":
         """Create a view of this problem through a Format.
         A View wraps this problem, parsing the current test case through
@@ -481,8 +729,6 @@ class View[S, T](ReductionProblem[T]):
         return self.__current
     async def is_interesting(self, test_case: T) -> bool:
-        from shrinkray.passes.definitions import DumpError
         try:
             return await self.__problem.is_interesting(self.__dump(test_case))
         except DumpError:

shrinkray/reducer.py CHANGED Viewed

@@ -49,7 +49,12 @@ from shrinkray.passes.patching import PatchApplier, Patches
 from shrinkray.passes.python import PYTHON_PASSES, is_python
 from shrinkray.passes.sat import SAT_PASSES, DimacsCNF
 from shrinkray.passes.sequences import block_deletion, delete_duplicates
-from shrinkray.problem import ReductionProblem, ReductionStats, shortlex
+from shrinkray.problem import (
+    ReductionProblem,
+    ReductionStats,
+    shortlex,
+    sort_key_for_initial,
+)
 @define
@@ -531,6 +536,8 @@ class KeyProblem(ReductionProblem[bytes]):
         self.base_problem = base_problem
         self.applier = applier
         self.key = key
+        # Use the appropriate sort key for this value (natural for text, shortlex for binary)
+        self._sort_key_fn = sort_key_for_initial(self.current_test_case)
     @property
     def current_test_case(self) -> bytes:
@@ -547,7 +554,7 @@ class KeyProblem(ReductionProblem[bytes]):
         return len(test_case)
     def sort_key(self, test_case: bytes) -> Any:
-        return shortlex(test_case)
+        return self._sort_key_fn(test_case)
     def display(self, value: bytes) -> str:
         return repr(value)

shrinkray 25.12.27.1__py3-none-any.whl → 25.12.27.3__py3-none-any.whl

shrinkray 25.12.27.1py3-none-any.whl → 25.12.27.3py3-none-any.whl