shrinkray 0.0.0__py3-none-any.whl → 25.12.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
shrinkray/state.py ADDED
@@ -0,0 +1,599 @@
1
+ """State management for shrink ray reduction sessions."""
2
+
3
+ import math
4
+ import os
5
+ import random
6
+ import shutil
7
+ import subprocess
8
+ import sys
9
+ import time
10
+ from abc import ABC, abstractmethod
11
+ from datetime import timedelta
12
+ from tempfile import TemporaryDirectory
13
+ from typing import Any
14
+
15
+ import humanize
16
+ import trio
17
+ from attrs import define
18
+
19
+ from shrinkray.passes.clangdelta import ClangDelta
20
+ from shrinkray.problem import (
21
+ BasicReductionProblem,
22
+ InvalidInitialExample,
23
+ ReductionProblem,
24
+ shortlex,
25
+ )
26
+ from shrinkray.reducer import DirectoryShrinkRay, Reducer, ShrinkRay
27
+ from shrinkray.work import Volume, WorkContext
28
+
29
+
30
+ class TimeoutExceededOnInitial(InvalidInitialExample):
31
+ def __init__(self, runtime: float, timeout: float) -> None:
32
+ self.runtime = runtime
33
+ self.timeout = timeout
34
+ super().__init__(
35
+ f"Initial test call exceeded timeout of {timeout}s. Try raising or disabling timeout."
36
+ )
37
+
38
+
39
+ @define(slots=False)
40
+ class ShrinkRayState[TestCase](ABC):
41
+ input_type: Any # InputType from __main__
42
+ in_place: bool
43
+ test: list[str]
44
+ filename: str
45
+ timeout: float
46
+ base: str
47
+ parallelism: int
48
+ initial: TestCase
49
+ formatter: str
50
+ trivial_is_error: bool
51
+ seed: int
52
+ volume: Volume
53
+ clang_delta_executable: ClangDelta | None
54
+
55
+ first_call: bool = True
56
+ initial_exit_code: int | None = None
57
+ can_format: bool = True
58
+ formatter_command: list[str] | None = None
59
+
60
+ first_call_time: float | None = None
61
+
62
+ # Lazy imports to break circular dependencies:
63
+ # - shrinkray.process imports from shrinkray.work which imports from here
64
+ # - shrinkray.cli imports from here for state configuration
65
+ # These are cached after first import for performance.
66
+ _interrupt_wait_and_kill: Any = None
67
+ _InputType: Any = None # InputType enum from shrinkray.cli
68
+
69
+ # Stores the output from the last debug run
70
+ _last_debug_output: str = ""
71
+
72
+ def __attrs_post_init__(self):
73
+ self.is_interesting_limiter = trio.CapacityLimiter(max(self.parallelism, 1))
74
+ self.setup_formatter()
75
+
76
+ @abstractmethod
77
+ def setup_formatter(self): ...
78
+
79
+ @abstractmethod
80
+ def new_reducer(self, problem: ReductionProblem[TestCase]) -> Reducer[TestCase]: ...
81
+
82
+ @abstractmethod
83
+ async def write_test_case_to_file_impl(self, working: str, test_case: TestCase): ...
84
+
85
+ async def write_test_case_to_file(self, working: str, test_case: TestCase):
86
+ await self.write_test_case_to_file_impl(working, test_case)
87
+
88
+ async def run_script_on_file(
89
+ self, working: str, cwd: str, debug: bool = False
90
+ ) -> int:
91
+ # Lazy import to avoid circular dependency
92
+ if self._interrupt_wait_and_kill is None:
93
+ from shrinkray.process import interrupt_wait_and_kill
94
+
95
+ self._interrupt_wait_and_kill = interrupt_wait_and_kill
96
+ if self._InputType is None:
97
+ from shrinkray.cli import InputType
98
+
99
+ self._InputType = InputType
100
+
101
+ if not os.path.exists(working):
102
+ raise ValueError(f"No such file {working}")
103
+ if self.input_type.enabled(self._InputType.arg):
104
+ command = self.test + [working]
105
+ else:
106
+ command = self.test
107
+
108
+ kwargs: dict[str, Any] = dict(
109
+ universal_newlines=False,
110
+ preexec_fn=os.setsid,
111
+ cwd=cwd,
112
+ check=False,
113
+ )
114
+ if self.input_type.enabled(self._InputType.stdin) and not os.path.isdir(
115
+ working
116
+ ):
117
+ with open(working, "rb") as i:
118
+ kwargs["stdin"] = i.read()
119
+ else:
120
+ kwargs["stdin"] = b""
121
+
122
+ # For debug mode, use simpler approach to capture output
123
+ if debug:
124
+ kwargs["capture_stdout"] = True
125
+ kwargs["capture_stderr"] = True
126
+ start_time = time.time()
127
+ completed = await trio.run_process(command, **kwargs)
128
+ runtime = time.time() - start_time
129
+
130
+ if runtime >= self.timeout and self.first_call:
131
+ self.initial_exit_code = completed.returncode
132
+ self.first_call = False
133
+ raise TimeoutExceededOnInitial(
134
+ timeout=self.timeout,
135
+ runtime=runtime,
136
+ )
137
+
138
+ if self.first_call:
139
+ self.initial_exit_code = completed.returncode
140
+ self.first_call = False
141
+
142
+ # Store captured output
143
+ output_parts = []
144
+ if completed.stdout:
145
+ output_parts.append(completed.stdout.decode("utf-8", errors="replace"))
146
+ if completed.stderr:
147
+ output_parts.append(completed.stderr.decode("utf-8", errors="replace"))
148
+ self._last_debug_output = "\n".join(output_parts).strip()
149
+
150
+ return completed.returncode
151
+
152
+ # Check if we should stream output to stderr (volume=debug)
153
+ if self.volume == Volume.debug:
154
+ # Inherit stderr from parent process to stream output in real-time
155
+ kwargs["stderr"] = None # None means inherit
156
+ kwargs["stdout"] = subprocess.DEVNULL
157
+ else:
158
+ # Non-debug mode: discard all output
159
+ kwargs["stdout"] = subprocess.DEVNULL
160
+ kwargs["stderr"] = subprocess.DEVNULL
161
+
162
+ async with trio.open_nursery() as nursery:
163
+
164
+ def call_with_kwargs(task_status=trio.TASK_STATUS_IGNORED): # type: ignore
165
+ return trio.run_process(command, **kwargs, task_status=task_status)
166
+
167
+ start_time = time.time()
168
+ sp = await nursery.start(call_with_kwargs)
169
+
170
+ try:
171
+ with trio.move_on_after(
172
+ self.timeout * 10 if self.first_call else self.timeout
173
+ ):
174
+ await sp.wait()
175
+
176
+ runtime = time.time() - start_time
177
+
178
+ if sp.returncode is None:
179
+ # Process didn't terminate before timeout - kill it
180
+ await self._interrupt_wait_and_kill(sp)
181
+
182
+ if runtime >= self.timeout and self.first_call:
183
+ raise TimeoutExceededOnInitial(
184
+ timeout=self.timeout,
185
+ runtime=runtime,
186
+ )
187
+ finally:
188
+ if self.first_call:
189
+ self.initial_exit_code = sp.returncode
190
+ self.first_call = False
191
+
192
+ result: int | None = sp.returncode
193
+ assert result is not None
194
+
195
+ return result
196
+
197
+ async def run_for_exit_code(self, test_case: TestCase, debug: bool = False) -> int:
198
+ # Lazy import
199
+ if self._InputType is None:
200
+ from shrinkray.cli import InputType
201
+
202
+ self._InputType = InputType
203
+
204
+ if self.in_place:
205
+ if self.input_type == self._InputType.basename:
206
+ working = self.filename
207
+ await self.write_test_case_to_file(working, test_case)
208
+
209
+ return await self.run_script_on_file(
210
+ working=working,
211
+ debug=debug,
212
+ cwd=os.getcwd(),
213
+ )
214
+ else:
215
+ base, ext = os.path.splitext(self.filename)
216
+ working = base + "-" + os.urandom(16).hex() + ext
217
+ assert not os.path.exists(working)
218
+ try:
219
+ await self.write_test_case_to_file(working, test_case)
220
+
221
+ return await self.run_script_on_file(
222
+ working=working,
223
+ debug=debug,
224
+ cwd=os.getcwd(),
225
+ )
226
+ finally:
227
+ if os.path.exists(working):
228
+ if os.path.isdir(working):
229
+ shutil.rmtree(working)
230
+ else:
231
+ os.unlink(working)
232
+ else:
233
+ with TemporaryDirectory() as d:
234
+ working = os.path.join(d, self.base)
235
+ await self.write_test_case_to_file(working, test_case)
236
+
237
+ return await self.run_script_on_file(
238
+ working=working,
239
+ debug=debug,
240
+ cwd=d,
241
+ )
242
+
243
+ @abstractmethod
244
+ async def format_data(self, test_case: TestCase) -> TestCase | None: ...
245
+
246
+ @abstractmethod
247
+ async def run_formatter_command(
248
+ self, command: str | list[str], input: TestCase
249
+ ) -> subprocess.CompletedProcess: ...
250
+
251
+ @abstractmethod
252
+ async def print_exit_message(self, problem): ...
253
+
254
+ @property
255
+ def reducer(self):
256
+ try:
257
+ return self.__reducer
258
+ except AttributeError:
259
+ pass
260
+
261
+ work = WorkContext(
262
+ random=random.Random(self.seed),
263
+ volume=self.volume,
264
+ parallelism=self.parallelism,
265
+ )
266
+
267
+ problem: BasicReductionProblem[TestCase] = BasicReductionProblem(
268
+ is_interesting=self.is_interesting,
269
+ initial=self.initial,
270
+ work=work,
271
+ **self.extra_problem_kwargs,
272
+ )
273
+
274
+ # Writing the file back can't be guaranteed atomic, so we put a lock around
275
+ # writing successful reductions back to the original file so we don't
276
+ # write some confused combination of reductions.
277
+ write_lock = trio.Lock()
278
+
279
+ @problem.on_reduce
280
+ async def _(test_case: TestCase):
281
+ async with write_lock:
282
+ await self.write_test_case_to_file(self.filename, test_case)
283
+
284
+ self.__reducer = self.new_reducer(problem)
285
+ return self.__reducer
286
+
287
+ @property
288
+ def extra_problem_kwargs(self):
289
+ return {}
290
+
291
+ @property
292
+ def problem(self):
293
+ return self.reducer.target
294
+
295
+ async def is_interesting(self, test_case: TestCase) -> bool:
296
+ if self.first_call_time is None:
297
+ self.first_call_time = time.time()
298
+ async with self.is_interesting_limiter:
299
+ return await self.run_for_exit_code(test_case) == 0
300
+
301
+ @property
302
+ def parallel_tasks_running(self) -> int:
303
+ """Number of parallel tasks currently running."""
304
+ return self.is_interesting_limiter.borrowed_tokens
305
+
306
+ async def attempt_format(self, data: TestCase) -> TestCase:
307
+ if not self.can_format:
308
+ return data
309
+ attempt = await self.format_data(data)
310
+ if attempt is None:
311
+ self.can_format = False
312
+ return data
313
+ if attempt == data or await self.is_interesting(attempt):
314
+ return attempt
315
+ else:
316
+ self.can_format = False
317
+ return data
318
+
319
+ async def check_formatter(self):
320
+ if self.formatter_command is None:
321
+ return
322
+ formatter_result = await self.run_formatter_command(
323
+ self.formatter_command, self.initial
324
+ )
325
+
326
+ if formatter_result.returncode != 0:
327
+ print(
328
+ "Formatter exited unexpectedly on initial test case. If this is expected, please run with --formatter=none.",
329
+ file=sys.stderr,
330
+ )
331
+ print(
332
+ formatter_result.stderr.decode("utf-8").strip(),
333
+ file=sys.stderr,
334
+ )
335
+ sys.exit(1)
336
+ reformatted = formatter_result.stdout
337
+ if not await self.is_interesting(reformatted) and await self.is_interesting(
338
+ self.initial
339
+ ):
340
+ print(
341
+ "Formatting initial test case made it uninteresting. If this is expected, please run with --formatter=none.",
342
+ file=sys.stderr,
343
+ )
344
+ print(
345
+ formatter_result.stderr.decode("utf-8").strip(),
346
+ file=sys.stderr,
347
+ )
348
+ sys.exit(1)
349
+
350
+ async def build_error_message(self, e: Exception) -> str:
351
+ """Build a detailed error message for an invalid initial example.
352
+
353
+ This is used by the subprocess worker to provide helpful error messages
354
+ without printing directly to stderr or calling sys.exit.
355
+ """
356
+ lines = [
357
+ "Shrink ray cannot proceed because the initial call of the "
358
+ "interestingness test resulted in an uninteresting test case."
359
+ ]
360
+
361
+ if isinstance(e, TimeoutExceededOnInitial):
362
+ lines.append(
363
+ f"This is because your initial test case took {e.runtime:.2f}s "
364
+ f"exceeding your timeout setting of {self.timeout}."
365
+ )
366
+ lines.append(f"Try rerunning with --timeout={math.ceil(e.runtime * 2)}.")
367
+ else:
368
+ lines.append("Rerunning the interestingness test for debugging purposes...")
369
+ exit_code = await self.run_for_exit_code(self.initial, debug=True)
370
+ if exit_code != 0:
371
+ lines.append(
372
+ f"This exited with code {exit_code}, but the script should "
373
+ "return 0 for interesting test cases."
374
+ )
375
+ # Include the captured output from the debug run
376
+ if self._last_debug_output:
377
+ lines.append("\nOutput from the interestingness test:")
378
+ lines.append(self._last_debug_output)
379
+ local_exit_code = await self.run_script_on_file(
380
+ working=self.filename,
381
+ debug=False,
382
+ cwd=os.getcwd(),
383
+ )
384
+ if local_exit_code == 0:
385
+ lines.append(
386
+ "\nNote that Shrink Ray runs your script on a copy of the file "
387
+ "in a temporary directory. Here are the results of running it "
388
+ "in the current directory..."
389
+ )
390
+ other_exit_code = await self.run_script_on_file(
391
+ working=self.filename,
392
+ debug=True,
393
+ cwd=os.getcwd(),
394
+ )
395
+ # Include the output from running in current directory
396
+ if self._last_debug_output:
397
+ lines.append(self._last_debug_output)
398
+ if other_exit_code != local_exit_code:
399
+ lines.append(
400
+ f"This interestingness test is probably flaky as the first "
401
+ f"time we reran it locally it exited with {local_exit_code}, "
402
+ f"but the second time it exited with {other_exit_code}. "
403
+ "Please make sure your interestingness test is deterministic."
404
+ )
405
+ else:
406
+ lines.append(
407
+ "This suggests that your script depends on being run from "
408
+ "the current working directory. Please fix it to be "
409
+ "directory independent."
410
+ )
411
+ else:
412
+ assert self.initial_exit_code not in (None, 0)
413
+ lines.append(
414
+ f"This exited with code 0, but previously the script exited with "
415
+ f"{self.initial_exit_code}. This suggests your interestingness "
416
+ "test exhibits nondeterministic behaviour."
417
+ )
418
+
419
+ return "\n".join(lines)
420
+
421
+ async def report_error(self, e):
422
+ error_message = await self.build_error_message(e)
423
+ print(error_message, file=sys.stderr)
424
+ sys.exit(1)
425
+
426
+ def check_trivial_result(self, problem) -> str | None:
427
+ """Check if the result is trivially small and return error message if so.
428
+
429
+ Returns None if the result is acceptable, or an error message string
430
+ if the result is trivial and trivial_is_error is True.
431
+ """
432
+ if len(problem.current_test_case) <= 1 and self.trivial_is_error:
433
+ return (
434
+ f"Reduced to a trivial test case of size {len(problem.current_test_case)}\n"
435
+ "This probably wasn't what you intended. If so, please modify your "
436
+ "interestingness test to be more restrictive.\n"
437
+ "If you intended this behaviour, you can run with '--trivial-is-not-error' "
438
+ "to suppress this message."
439
+ )
440
+ return None
441
+
442
+
443
+ @define(slots=False)
444
+ class ShrinkRayStateSingleFile(ShrinkRayState[bytes]):
445
+ def new_reducer(self, problem: ReductionProblem[bytes]) -> Reducer[bytes]:
446
+ return ShrinkRay(problem, clang_delta=self.clang_delta_executable)
447
+
448
+ def setup_formatter(self):
449
+ from shrinkray.formatting import (
450
+ default_reformat_data,
451
+ determine_formatter_command,
452
+ )
453
+
454
+ if self.formatter.lower() == "none":
455
+
456
+ async def format_data(test_case: bytes) -> bytes | None:
457
+ await trio.lowlevel.checkpoint()
458
+ return test_case
459
+
460
+ self.can_format = False
461
+
462
+ else:
463
+ formatter_command = determine_formatter_command(
464
+ self.formatter, self.filename
465
+ )
466
+ if formatter_command is not None:
467
+ self.formatter_command = formatter_command
468
+
469
+ async def format_data(test_case: bytes) -> bytes | None:
470
+ result = await self.run_formatter_command(
471
+ formatter_command, test_case
472
+ )
473
+ if result.returncode != 0:
474
+ return None
475
+ return result.stdout
476
+
477
+ else:
478
+
479
+ async def format_data(test_case: bytes) -> bytes | None:
480
+ await trio.lowlevel.checkpoint()
481
+ return default_reformat_data(test_case)
482
+
483
+ self.__format_data = format_data
484
+
485
+ async def format_data(self, test_case: bytes) -> bytes | None:
486
+ return await self.__format_data(test_case)
487
+
488
+ async def run_formatter_command(
489
+ self, command: str | list[str], input: bytes
490
+ ) -> subprocess.CompletedProcess:
491
+ return await trio.run_process(
492
+ command,
493
+ stdin=input,
494
+ capture_stdout=True,
495
+ capture_stderr=True,
496
+ check=False,
497
+ )
498
+
499
+ async def write_test_case_to_file_impl(self, working: str, test_case: bytes):
500
+ async with await trio.open_file(working, "wb") as o:
501
+ await o.write(test_case)
502
+
503
+ async def is_interesting(self, test_case: bytes) -> bool:
504
+ async with self.is_interesting_limiter:
505
+ return await self.run_for_exit_code(test_case) == 0
506
+
507
+ async def print_exit_message(self, problem):
508
+ formatting_increase = 0
509
+ final_result = problem.current_test_case
510
+ reformatted = await self.attempt_format(final_result)
511
+ if reformatted != final_result:
512
+ # attempt_format only returns a different value if is_interesting was True
513
+ async with await trio.open_file(self.filename, "wb") as o:
514
+ await o.write(reformatted)
515
+ formatting_increase = max(0, len(reformatted) - len(final_result))
516
+ final_result = reformatted
517
+
518
+ if len(problem.current_test_case) <= 1 and self.trivial_is_error:
519
+ print(
520
+ f"Reduced to a trivial test case of size {len(problem.current_test_case)}"
521
+ )
522
+ print(
523
+ "This probably wasn't what you intended. If so, please modify your interestingness test "
524
+ "to be more restrictive.\n"
525
+ "If you intended this behaviour, you can run with '--trivial-is-not-error' to "
526
+ "suppress this message."
527
+ )
528
+ sys.exit(1)
529
+
530
+ else:
531
+ print("Reduction completed!")
532
+ stats = problem.stats
533
+ if self.initial == final_result:
534
+ print("Test case was already maximally reduced.")
535
+ elif len(final_result) < len(self.initial):
536
+ print(
537
+ f"Deleted {humanize.naturalsize(stats.initial_test_case_size - len(final_result))} "
538
+ f"out of {humanize.naturalsize(stats.initial_test_case_size)} "
539
+ f"({(1.0 - len(final_result) / stats.initial_test_case_size) * 100:.2f}% reduction) "
540
+ f"in {humanize.precisedelta(timedelta(seconds=time.time() - stats.start_time))}"
541
+ )
542
+ elif len(final_result) == len(self.initial):
543
+ print("Some changes were made but no bytes were deleted")
544
+ else:
545
+ print(
546
+ f"Running reformatting resulted in an increase of {humanize.naturalsize(formatting_increase)}."
547
+ )
548
+
549
+
550
+ class ShrinkRayDirectoryState(ShrinkRayState[dict[str, bytes]]):
551
+ def setup_formatter(self): ...
552
+
553
+ @property
554
+ def extra_problem_kwargs(self):
555
+ def dict_size(test_case: dict[str, bytes]) -> int:
556
+ return sum(len(v) for v in test_case.values())
557
+
558
+ def dict_sort_key(test_case: dict[str, bytes]) -> Any:
559
+ return (
560
+ len(test_case),
561
+ dict_size(test_case),
562
+ sorted((k, shortlex(v)) for k, v in test_case.items()),
563
+ )
564
+
565
+ return dict(
566
+ sort_key=dict_sort_key,
567
+ size=dict_size,
568
+ )
569
+
570
+ def new_reducer(
571
+ self, problem: ReductionProblem[dict[str, bytes]]
572
+ ) -> Reducer[dict[str, bytes]]:
573
+ return DirectoryShrinkRay(
574
+ target=problem, clang_delta=self.clang_delta_executable
575
+ )
576
+
577
+ async def write_test_case_to_file_impl(
578
+ self, working: str, test_case: dict[str, bytes]
579
+ ):
580
+ shutil.rmtree(working, ignore_errors=True)
581
+ os.makedirs(working, exist_ok=True)
582
+ for k, v in test_case.items():
583
+ f = os.path.join(working, k)
584
+ os.makedirs(os.path.dirname(f), exist_ok=True)
585
+ async with await trio.open_file(f, "wb") as o:
586
+ await o.write(v)
587
+
588
+ async def format_data(self, test_case: dict[str, bytes]) -> dict[str, bytes] | None:
589
+ # Formatting not supported for directory reduction
590
+ return None
591
+
592
+ async def run_formatter_command(
593
+ self, command: str | list[str], input: dict[str, bytes]
594
+ ) -> subprocess.CompletedProcess:
595
+ # Formatting not supported for directory reduction
596
+ raise NotImplementedError("Directory formatting not supported")
597
+
598
+ async def print_exit_message(self, problem):
599
+ print("All done!")
@@ -0,0 +1,24 @@
1
+ """Subprocess communication for separating reducer from UI."""
2
+
3
+ from shrinkray.subprocess.client import SubprocessClient
4
+ from shrinkray.subprocess.protocol import (
5
+ ProgressUpdate,
6
+ Request,
7
+ Response,
8
+ decode_bytes,
9
+ deserialize,
10
+ encode_bytes,
11
+ serialize,
12
+ )
13
+
14
+
15
+ __all__ = [
16
+ "Request",
17
+ "Response",
18
+ "ProgressUpdate",
19
+ "serialize",
20
+ "deserialize",
21
+ "encode_bytes",
22
+ "decode_bytes",
23
+ "SubprocessClient",
24
+ ]