PyPI - shrinkray - Versions diffs - 0.0.0__py3-none-any.whl - Mend

shrinkray 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

shrinkray/__init__.py +1 -0
shrinkray/__main__.py +1205 -0
shrinkray/learning.py +221 -0
shrinkray/passes/__init__.py +0 -0
shrinkray/passes/bytes.py +547 -0
shrinkray/passes/clangdelta.py +230 -0
shrinkray/passes/definitions.py +52 -0
shrinkray/passes/genericlanguages.py +277 -0
shrinkray/passes/json.py +91 -0
shrinkray/passes/patching.py +280 -0
shrinkray/passes/python.py +176 -0
shrinkray/passes/sat.py +176 -0
shrinkray/passes/sequences.py +69 -0
shrinkray/problem.py +318 -0
shrinkray/py.typed +0 -0
shrinkray/reducer.py +430 -0
shrinkray/work.py +217 -0
shrinkray-0.0.0.dist-info/LICENSE +21 -0
shrinkray-0.0.0.dist-info/METADATA +170 -0
shrinkray-0.0.0.dist-info/RECORD +22 -0
shrinkray-0.0.0.dist-info/WHEEL +4 -0
shrinkray-0.0.0.dist-info/entry_points.txt +3 -0

shrinkray/reducer.py ADDED Viewed

@@ -0,0 +1,430 @@
+from abc import ABC, abstractmethod
+from collections.abc import Generator
+from contextlib import contextmanager
+from typing import Any, Generic, Iterable, Optional, TypeVar
+import attrs
+import trio
+from attrs import define
+from shrinkray.passes.bytes import (
+    Split,
+    Tokenize,
+    debracket,
+    delete_byte_spans,
+    hollow,
+    lexeme_based_deletions,
+    lift_braces,
+    lower_bytes,
+    lower_individual_bytes,
+    remove_indents,
+    remove_whitespace,
+    replace_space_with_newlines,
+    short_deletions,
+    standard_substitutions,
+)
+from shrinkray.passes.clangdelta import C_FILE_EXTENSIONS, ClangDelta, clang_delta_pumps
+from shrinkray.passes.definitions import Format, ReductionPass, ReductionPump, compose
+from shrinkray.passes.genericlanguages import (
+    combine_expressions,
+    cut_comment_like_things,
+    merge_adjacent_strings,
+    normalize_identifiers,
+    reduce_integer_literals,
+    replace_falsey_with_zero,
+    simplify_brackets,
+)
+from shrinkray.passes.json import JSON, JSON_PASSES
+from shrinkray.passes.patching import PatchApplier, Patches
+from shrinkray.passes.python import PYTHON_PASSES, is_python
+from shrinkray.passes.sat import SAT_PASSES, DimacsCNF
+from shrinkray.passes.sequences import block_deletion, delete_duplicates
+from shrinkray.problem import ReductionProblem, shortlex
+S = TypeVar("S")
+T = TypeVar("T")
+@define
+class Reducer(Generic[T], ABC):
+    target: ReductionProblem[T]
+    @contextmanager
+    def backtrack(self, restart: T) -> Generator[None, None, None]:
+        current = self.target
+        try:
+            self.target = self.target.backtrack(restart)
+            yield
+        finally:
+            self.target = current
+    @abstractmethod
+    async def run(self) -> None: ...
+    @property
+    def status(self) -> str:
+        return ""
+@define
+class BasicReducer(Reducer[T]):
+    reduction_passes: Iterable[ReductionPass[T]]
+    pumps: Iterable[ReductionPump[T]] = ()
+    status: str = "Starting up"
+    def __attrs_post_init__(self) -> None:
+        self.reduction_passes = list(self.reduction_passes)
+    async def run_pass(self, rp: ReductionPass[T]) -> None:
+        await rp(self.target)
+    async def run(self) -> None:
+        await self.target.setup()
+        while True:
+            prev = self.target.current_test_case
+            for rp in self.reduction_passes:
+                self.status = f"Running reduction pass {rp.__name__}"
+                await self.run_pass(rp)
+            for pump in self.pumps:
+                self.status = f"Pumping with {pump.__name__}"
+                pumped = await pump(self.target)
+                if pumped != self.target.current_test_case:
+                    with self.backtrack(pumped):
+                        for rp in self.reduction_passes:
+                            self.status = f"Running reduction pass {rp.__name__} under pump {pump.__name__}"
+                            await self.run_pass(rp)
+            if prev == self.target.current_test_case:
+                return
+class RestartPass(Exception):
+    pass
+@define
+class ShrinkRay(Reducer[bytes]):
+    clang_delta: Optional[ClangDelta] = None
+    current_reduction_pass: Optional[ReductionPass[bytes]] = None
+    current_pump: Optional[ReductionPump[bytes]] = None
+    unlocked_ok_passes: bool = False
+    initial_cuts: list[ReductionPass[bytes]] = attrs.Factory(
+        lambda: [
+            cut_comment_like_things,
+            hollow,
+            compose(Split(b"\n"), delete_duplicates),
+            compose(Split(b"\n"), block_deletion(10, 100)),
+            lift_braces,
+            remove_indents,
+            remove_whitespace,
+        ]
+    )
+    great_passes: list[ReductionPass[bytes]] = attrs.Factory(
+        lambda: [
+            compose(Split(b"\n"), delete_duplicates),
+            compose(Split(b"\n"), block_deletion(1, 10)),
+            compose(Split(b";"), block_deletion(1, 10)),
+            remove_indents,
+            hollow,
+            lift_braces,
+            delete_byte_spans,
+            debracket,
+        ]
+    )
+    ok_passes: list[ReductionPass[bytes]] = attrs.Factory(
+        lambda: [
+            compose(Split(b"\n"), block_deletion(11, 20)),
+            remove_indents,
+            remove_whitespace,
+            compose(Tokenize(), block_deletion(1, 20)),
+            reduce_integer_literals,
+            replace_falsey_with_zero,
+            combine_expressions,
+            merge_adjacent_strings,
+            lexeme_based_deletions,
+            short_deletions,
+            normalize_identifiers,
+        ]
+    )
+    last_ditch_passes: list[ReductionPass[bytes]] = attrs.Factory(
+        lambda: [
+            compose(Split(b"\n"), block_deletion(21, 100)),
+            replace_space_with_newlines,
+            delete_byte_spans,
+            lower_bytes,
+            lower_individual_bytes,
+            simplify_brackets,
+            standard_substitutions,
+            # This is in last ditch because it's probably not useful
+            # to run it more than once.
+            cut_comment_like_things,
+        ]
+    )
+    def __attrs_post_init__(self) -> None:
+        if is_python(self.target.current_test_case):
+            self.great_passes[:0] = PYTHON_PASSES
+            self.initial_cuts[:0] = PYTHON_PASSES
+        self.register_format_specific_pass(JSON, JSON_PASSES)
+        self.register_format_specific_pass(
+            DimacsCNF,
+            SAT_PASSES,
+        )
+    def register_format_specific_pass(
+        self, format: Format[bytes, T], passes: Iterable[ReductionPass[T]]
+    ):
+        if format.is_valid(self.target.current_test_case):
+            composed = [compose(format, p) for p in passes]
+            self.great_passes[:0] = composed
+            self.initial_cuts[:0] = composed
+    @property
+    def pumps(self) -> Iterable[ReductionPump[bytes]]:
+        if self.clang_delta is None:
+            return ()
+        else:
+            return clang_delta_pumps(self.clang_delta)
+    @property
+    def status(self) -> str:
+        if self.current_pump is None:
+            if self.current_reduction_pass is not None:
+                return f"Running reduction pass {self.current_reduction_pass.__name__}"
+            else:
+                return "Selecting reduction pass"
+        else:
+            if self.current_reduction_pass is not None:
+                return f"Running reduction pass {self.current_reduction_pass.__name__} under pump {self.current_pump.__name__}"
+            else:
+                return f"Running reduction pump {self.current_pump.__name__}"
+    async def run_pass(self, rp: ReductionPass[bytes]) -> None:
+        try:
+            assert self.current_reduction_pass is None
+            self.current_reduction_pass = rp
+            await rp(self.target)
+        finally:
+            self.current_reduction_pass = None
+    async def pump(self, rp: ReductionPump[bytes]) -> None:
+        try:
+            assert self.current_pump is None
+            self.current_pump = rp
+            pumped = await rp(self.target)
+            current = self.target.current_test_case
+            if pumped == current:
+                return
+            with self.backtrack(pumped):
+                for f in [
+                    self.run_great_passes,
+                    self.run_ok_passes,
+                    self.run_last_ditch_passes,
+                ]:
+                    await f()
+                    if self.target.sort_key(
+                        self.target.current_test_case
+                    ) < self.target.sort_key(current):
+                        break
+        finally:
+            self.current_pump = None
+    async def run_great_passes(self) -> None:
+        for rp in self.great_passes:
+            await self.run_pass(rp)
+    async def run_ok_passes(self) -> None:
+        for rp in self.ok_passes:
+            await self.run_pass(rp)
+    async def run_last_ditch_passes(self) -> None:
+        for rp in self.last_ditch_passes:
+            await self.run_pass(rp)
+    async def run_some_passes(self) -> None:
+        prev = self.target.current_test_case
+        await self.run_great_passes()
+        if prev != self.target.current_test_case and not self.unlocked_ok_passes:
+            return
+        self.unlocked_ok_passes = True
+        await self.run_ok_passes()
+        if prev != self.target.current_test_case:
+            return
+        await self.run_last_ditch_passes()
+    async def initial_cut(self) -> None:
+        while True:
+            prev = self.target.current_size
+            for rp in self.initial_cuts:
+                async with trio.open_nursery() as nursery:
+                    @nursery.start_soon
+                    async def _() -> None:
+                        """
+                        Watcher task that cancels the current reduction pass as
+                        soon as it stops looking like a good idea to keep running
+                        it. Current criteria:
+                        1. If it's been more than 5s since the last successful reduction.
+                        2. If the reduction rate of the task has dropped under 50% of its
+                           best so far.
+                        """
+                        iters = 0
+                        initial_size = self.target.current_size
+                        best_reduction_rate: float | None = None
+                        while True:
+                            iters += 1
+                            deleted = initial_size - self.target.current_size
+                            current = self.target.current_test_case
+                            await trio.sleep(5)
+                            rate = deleted / iters
+                            if (
+                                best_reduction_rate is None
+                                or rate > best_reduction_rate
+                            ):
+                                best_reduction_rate = rate
+                            assert best_reduction_rate is not None
+                            if (
+                                rate < 0.5 * best_reduction_rate
+                                or current == self.target.current_test_case
+                            ):
+                                nursery.cancel_scope.cancel()
+                                break
+                    await self.run_pass(rp)
+                    nursery.cancel_scope.cancel()
+            if self.target.current_size >= 0.99 * prev:
+                return
+    async def run(self) -> None:
+        await self.target.setup()
+        if await self.target.is_interesting(b""):
+            return
+        prev = 0
+        for c in [0, 1, ord(b"\n"), ord(b"0"), ord(b"z"), 255]:
+            if await self.target.is_interesting(bytes([c])):
+                for i in range(c):
+                    if await self.target.is_interesting(bytes([i])):
+                        break
+                return
+        await self.initial_cut()
+        while True:
+            prev = self.target.current_test_case
+            await self.run_some_passes()
+            if self.target.current_test_case != prev:
+                continue
+            for pump in self.pumps:
+                await self.pump(pump)
+            if self.target.current_test_case == prev:
+                break
+class UpdateKeys(Patches[dict[str, bytes], dict[str, bytes]]):
+    @property
+    def empty(self) -> dict[str, bytes]:
+        return {}
+    def combine(self, *patches: dict[str, bytes]) -> dict[str, bytes]:
+        result = {}
+        for p in patches:
+            for k, v in p.items():
+                result[k] = v
+        return result
+    def apply(
+        self, patch: dict[str, bytes], target: dict[str, bytes]
+    ) -> dict[str, bytes]:
+        result = target.copy()
+        result.update(patch)
+        return result
+    def size(self, patch: dict[str, bytes]) -> int:
+        return len(patch)
+class KeyProblem(ReductionProblem[bytes]):
+    def __init__(
+        self,
+        base_problem: ReductionProblem[dict[str, bytes]],
+        applier: PatchApplier[dict[str, bytes], dict[str, bytes]],
+        key: str,
+    ):
+        super().__init__(work=base_problem.work)
+        self.base_problem = base_problem
+        self.applier = applier
+        self.key = key
+    @property
+    def current_test_case(self) -> bytes:
+        return self.base_problem.current_test_case[self.key]
+    async def is_interesting(self, test_case: bytes) -> bool:
+        return await self.applier.try_apply_patch({self.key: test_case})
+    def size(self, test_case: bytes) -> int:
+        return len(test_case)
+    def sort_key(self, test_case: bytes) -> Any:
+        return shortlex(test_case)
+    def display(self, value: bytes) -> str:
+        return repr(value)
+@define
+class DirectoryShrinkRay(Reducer[dict[str, bytes]]):
+    clang_delta: Optional[ClangDelta] = None
+    async def run(self):
+        prev = None
+        while prev != self.target.current_test_case:
+            prev = self.target.current_test_case
+            await self.delete_keys()
+            await self.shrink_values()
+    async def delete_keys(self):
+        target = self.target.current_test_case
+        keys = list(target.keys())
+        keys.sort(key=lambda k: (shortlex(target[k]), shortlex(k)), reverse=True)
+        for k in keys:
+            attempt = self.target.current_test_case.copy()
+            del attempt[k]
+            await self.target.is_interesting(attempt)
+    async def shrink_values(self):
+        async with trio.open_nursery() as nursery:
+            applier = PatchApplier(patches=UpdateKeys(), problem=self.target)
+            for k in self.target.current_test_case.keys():
+                key_problem = KeyProblem(
+                    base_problem=self.target,
+                    applier=applier,
+                    key=k,
+                )
+                if self.clang_delta is not None and any(
+                    k.endswith(s) for s in C_FILE_EXTENSIONS
+                ):
+                    clang_delta = self.clang_delta
+                else:
+                    clang_delta = None
+                key_shrinkray = ShrinkRay(
+                    clang_delta=clang_delta,
+                    target=key_problem,
+                )
+                nursery.start_soon(key_shrinkray.run)

shrinkray/work.py ADDED Viewed

@@ -0,0 +1,217 @@
+import heapq
+from contextlib import asynccontextmanager
+from enum import IntEnum
+from itertools import islice
+from random import Random
+from typing import Awaitable, Callable, Optional, Sequence, TypeVar
+import trio
+class Volume(IntEnum):
+    quiet = 0
+    normal = 1
+    verbose = 2
+    debug = 3
+S = TypeVar("S")
+T = TypeVar("T")
+TICK_FREQUENCY = 0.05
+class WorkContext:
+    """A grab bag of useful tools for 'doing work'. Manages randomness,
+    logging, concurrency."""
+    def __init__(
+        self,
+        random: Optional[Random] = None,
+        parallelism: int = 1,
+        volume: Volume = Volume.normal,
+    ):
+        self.random = random or Random(0)
+        self.parallelism = parallelism
+        self.volume = volume
+        self.last_ticked = float("-inf")
+    @asynccontextmanager
+    async def map(self, ls: Sequence[T], f: Callable[[T], Awaitable[S]]):
+        """Lazy parallel map.
+        Does a reasonable amount of fine tuning so that it doesn't race
+        ahead of the current point of iteration and will generallly have
+        prefetched at most as many values as you've already read. This
+        is especially important for its use in implementing `find_first`,
+        which we want to avoid doing redundant work when there are lots of
+        reduction opportunities.
+        """
+        async with trio.open_nursery() as nursery:
+            send, receive = trio.open_memory_channel(self.parallelism + 1)
+            @nursery.start_soon
+            async def do_map():
+                if self.parallelism > 1:
+                    it = iter(ls)
+                    for x in it:
+                        await send.send(await f(x))
+                        break
+                    else:
+                        return
+                    n = 2
+                    while True:
+                        values = list(islice(it, n))
+                        if not values:
+                            send.close()
+                            return
+                        async with parallel_map(
+                            values, f, parallelism=min(self.parallelism, n)
+                        ) as result:
+                            async for v in result:
+                                await send.send(v)
+                        n *= 2
+                else:
+                    for x in ls:
+                        await send.send(await f(x))
+                    send.close()
+            yield receive
+    @asynccontextmanager
+    async def filter(self, ls: Sequence[T], f: Callable[[T], Awaitable[bool]]):
+        async def apply(x: T) -> tuple[T, bool]:
+            return (x, await f(x))
+        async with trio.open_nursery() as nursery:
+            send, receive = trio.open_memory_channel(float("inf"))
+            @nursery.start_soon
+            async def _():
+                async with self.map(ls, apply) as results:
+                    async for x, v in results:
+                        if v:
+                            await send.send(x)
+            yield receive
+            nursery.cancel_scope.cancel()
+    async def find_first_value(
+        self, ls: Sequence[T], f: Callable[[T], Awaitable[bool]]
+    ) -> T:
+        """Returns the first element of `ls` that satisfies `f`, or
+        raises `NotFound` if no such element exists.
+        Will run in parallel if parallelism is enabled.
+        """
+        async with self.filter(ls, f) as filtered:
+            async for x in filtered:
+                return x
+        raise NotFound()
+    async def find_large_integer(self, f: Callable[[int], Awaitable[bool]]) -> int:
+        """Finds a (hopefully large) integer n such that f(n) is True and f(n + 1)
+        is False. Runs in O(log(n)).
+        f(0) is assumed to be True and will not be checked. May not terminate unless
+        f(n) is False for all sufficiently large n.
+        """
+        # We first do a linear scan over the small numbers and only start to do
+        # anything intelligent if f(4) is true. This is because it's very hard to
+        # win big when the result is small. If the result is 0 and we try 2 first
+        # then we've done twice as much work as we needed to!
+        for i in range(1, 5):
+            if not await f(i):
+                return i - 1
+        # We now know that f(4) is true. We want to find some number for which
+        # f(n) is *not* true.
+        # lo is the largest number for which we know that f(lo) is true.
+        lo = 4
+        # Exponential probe upwards until we find some value hi such that f(hi)
+        # is not true. Subsequently we maintain the invariant that hi is the
+        # smallest number for which we know that f(hi) is not true.
+        hi = 5
+        while await f(hi):
+            lo = hi
+            hi *= 2
+        # Now binary search until lo + 1 = hi. At that point we have f(lo) and not
+        # f(lo + 1), as desired.
+        while lo + 1 < hi:
+            mid = (lo + hi) // 2
+            if await f(mid):
+                lo = mid
+            else:
+                hi = mid
+        return lo
+    def warn(self, msg: str) -> None:
+        self.report(msg, Volume.normal)
+    def note(self, msg: str) -> None:
+        self.report(msg, Volume.normal)
+    def debug(self, msg: str) -> None:
+        self.report(msg, Volume.debug)
+    def report(self, msg: str, level: Volume) -> None:
+        return
+class NotFound(Exception):
+    pass
+@asynccontextmanager
+async def parallel_map(
+    ls: Sequence[T],
+    f: Callable[[T], Awaitable[S]],
+    parallelism: int,
+):
+    send_out_values, receive_out_values = trio.open_memory_channel(parallelism)
+    work = list(enumerate(ls))
+    work.reverse()
+    result_heap = []
+    async with trio.open_nursery() as nursery:
+        results_ready = trio.Event()
+        for _ in range(parallelism):
+            @nursery.start_soon
+            async def do_work():
+                while work:
+                    i, x = work.pop()
+                    result = await f(x)
+                    heapq.heappush(result_heap, (i, result))
+                    results_ready.set()
+        @nursery.start_soon
+        async def consolidate() -> None:
+            i = 0
+            while work or result_heap:
+                while not result_heap:
+                    await results_ready.wait()
+                assert result_heap
+                j, x = result_heap[0]
+                if j == i:
+                    await send_out_values.send(x)
+                    i = j + 1
+                    heapq.heappop(result_heap)
+                else:
+                    await results_ready.wait()
+            send_out_values.close()
+        yield receive_out_values
+        nursery.cancel_scope.cancel()

shrinkray-0.0.0.dist-info/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright © 2023 David R. MacIver
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.