shrinkray 0.0.0__py3-none-any.whl → 25.12.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
shrinkray/cli.py ADDED
@@ -0,0 +1,70 @@
1
+ """CLI utilities and types for shrink ray."""
2
+
3
+ import os
4
+ import shlex
5
+ import sys
6
+ from enum import Enum, IntEnum, auto
7
+ from shutil import which
8
+ from typing import Any
9
+
10
+ import click
11
+
12
+
13
+ def validate_command(ctx: Any, param: Any, value: str) -> list[str]:
14
+ """Validate and resolve a command string."""
15
+ parts = shlex.split(value)
16
+ command = parts[0]
17
+
18
+ if os.path.exists(command):
19
+ command = os.path.abspath(command)
20
+ else:
21
+ what = which(command)
22
+ if what is None:
23
+ raise click.BadParameter(f"{command}: command not found")
24
+ command = os.path.abspath(what)
25
+ return [command] + parts[1:]
26
+
27
+
28
+ class EnumChoice[EnumType: Enum](click.Choice):
29
+ """A click Choice that works with Enums."""
30
+
31
+ def __init__(self, enum: type[EnumType]) -> None:
32
+ self.enum = enum
33
+ choices = [str(e.name) for e in enum]
34
+ self.__values = {e.name: e for e in enum}
35
+ super().__init__(choices)
36
+
37
+ def convert(self, value: str, param: Any, ctx: Any) -> EnumType:
38
+ return self.__values[value]
39
+
40
+
41
+ class InputType(IntEnum):
42
+ """How input is passed to the test function."""
43
+
44
+ all = 0
45
+ stdin = 1
46
+ arg = 2
47
+ basename = 3
48
+
49
+ def enabled(self, value: "InputType") -> bool:
50
+ if self == InputType.all:
51
+ return True
52
+ return self == value
53
+
54
+
55
+ class UIType(Enum):
56
+ """Type of UI to use."""
57
+
58
+ basic = auto()
59
+ textual = auto()
60
+
61
+
62
+ def validate_ui(ctx, param, value) -> UIType:
63
+ """Validate and determine UI type."""
64
+ if value is None:
65
+ if sys.stdin.isatty() and sys.stdout.isatty():
66
+ return UIType.textual
67
+ else:
68
+ return UIType.basic
69
+ else:
70
+ return value
shrinkray/display.py ADDED
@@ -0,0 +1,75 @@
1
+ """Display utilities for shrink ray."""
2
+
3
+ import shutil
4
+ from collections.abc import Iterable
5
+
6
+ from binaryornot.check import is_binary_string # type: ignore[import-not-found]
7
+
8
+
9
+ def get_terminal_size() -> tuple[int, int]:
10
+ """Get terminal size, with sensible fallbacks.
11
+
12
+ Returns:
13
+ (columns, lines) tuple. Defaults to (80, 24) if terminal size
14
+ cannot be determined.
15
+ """
16
+ size = shutil.get_terminal_size(fallback=(80, 24))
17
+ return (size.columns, size.lines)
18
+
19
+
20
+ def to_lines(test_case: bytes) -> list[str]:
21
+ """Convert a test case to displayable lines."""
22
+ result = []
23
+ for line in test_case.split(b"\n"):
24
+ if is_binary_string(line):
25
+ result.append(line.hex())
26
+ else:
27
+ try:
28
+ result.append(line.decode("utf-8"))
29
+ except UnicodeDecodeError:
30
+ result.append(line.hex())
31
+ return result
32
+
33
+
34
+ def to_blocks(test_case: bytes, block_size: int | None = None) -> list[str]:
35
+ """Convert a test case to hex blocks for display.
36
+
37
+ Args:
38
+ test_case: The bytes to convert
39
+ block_size: Number of bytes per block. If None, automatically
40
+ calculated from terminal width (each byte becomes 2 hex chars).
41
+ """
42
+ if block_size is None:
43
+ columns, _ = get_terminal_size()
44
+ # Each byte becomes 2 hex chars, leave some margin
45
+ block_size = max(1, (columns - 4) // 2)
46
+ return [
47
+ test_case[i : i + block_size].hex()
48
+ for i in range(0, len(test_case), block_size)
49
+ ]
50
+
51
+
52
+ def format_diff(diff: Iterable[str], max_lines: int | None = None) -> str:
53
+ """Format a diff for display, truncating if too long.
54
+
55
+ Args:
56
+ diff: Iterable of diff lines
57
+ max_lines: Maximum number of lines to include. If None, uses
58
+ terminal height multiplied by a factor to allow scrolling
59
+ through substantial context.
60
+ """
61
+ if max_lines is None:
62
+ _, lines = get_terminal_size()
63
+ # Allow multiple screenfuls of context for scrolling
64
+ max_lines = max(lines * 20, 100)
65
+ results = []
66
+ start_writing = False
67
+ for line in diff:
68
+ if not start_writing and line.startswith("@@"):
69
+ start_writing = True
70
+ if start_writing:
71
+ results.append(line)
72
+ if len(results) > max_lines:
73
+ results.append("...")
74
+ break
75
+ return "\n".join(results)
@@ -0,0 +1,108 @@
1
+ """Formatting utilities for shrink ray."""
2
+
3
+ import os
4
+ import sys
5
+ from shutil import which
6
+
7
+ import chardet
8
+
9
+
10
+ def find_python_command(name: str) -> str | None:
11
+ """Find a Python command, checking both PATH and the current Python's bin directory."""
12
+ first_attempt = which(name)
13
+ if first_attempt is not None:
14
+ return first_attempt
15
+ second_attempt = os.path.join(os.path.dirname(sys.executable), name)
16
+ if os.path.exists(second_attempt):
17
+ return second_attempt
18
+ return None
19
+
20
+
21
+ def try_decode(data: bytes) -> tuple[str | None, str]:
22
+ """Try to decode bytes using detected encoding."""
23
+ for guess in chardet.detect_all(data):
24
+ try:
25
+ enc = guess["encoding"]
26
+ if enc is not None:
27
+ return enc, data.decode(enc)
28
+ except UnicodeDecodeError:
29
+ pass
30
+ return None, ""
31
+
32
+
33
+ def default_formatter_command_for(filename: str) -> list[str] | str | None:
34
+ """Get the default formatter command for a file based on its extension."""
35
+ *_, ext = os.path.splitext(filename)
36
+
37
+ if ext in (".c", ".h", ".cpp", ".hpp", ".cc", ".cxx"):
38
+ return which("clang-format")
39
+
40
+ if ext == ".py":
41
+ black = find_python_command("black")
42
+ if black is not None:
43
+ return [black, "-"]
44
+
45
+ return None
46
+
47
+
48
+ def default_reformat_data(data: bytes) -> bytes:
49
+ """Apply a simple language-agnostic reformatting to data."""
50
+ encoding, decoded = try_decode(data)
51
+ if encoding is None:
52
+ return data
53
+ result = []
54
+ indent = 0
55
+
56
+ def newline() -> None:
57
+ result.append("\n" + indent * " ")
58
+
59
+ start_of_newline = True
60
+ for i, c in enumerate(decoded):
61
+ if c == "\n":
62
+ start_of_newline = True
63
+ newline()
64
+ continue
65
+ elif c == " ":
66
+ if start_of_newline:
67
+ continue
68
+ else:
69
+ start_of_newline = False
70
+ if c == "{":
71
+ result.append(c)
72
+ indent += 4
73
+ if i + 1 == len(decoded) or decoded[i + 1] != "}":
74
+ newline()
75
+ elif c == "}":
76
+ if len(result) > 1 and result[-1].endswith(" "):
77
+ result[-1] = result[-1][:-4]
78
+ result.append(c)
79
+ indent -= 4
80
+ newline()
81
+ elif c == ";":
82
+ result.append(c)
83
+ newline()
84
+ else:
85
+ result.append(c)
86
+
87
+ output = "".join(result)
88
+ prev = None
89
+ while prev != output:
90
+ prev = output
91
+
92
+ output = output.replace(" \n", "\n")
93
+ output = output.replace("\n\n", "\n")
94
+
95
+ return output.encode(encoding)
96
+
97
+
98
+ def determine_formatter_command(formatter: str, filename: str) -> list[str] | None:
99
+ """Determine the formatter command to use based on settings and filename."""
100
+ if formatter.lower() == "default":
101
+ formatter_command = default_formatter_command_for(filename)
102
+ elif formatter.lower() != "none":
103
+ formatter_command = formatter
104
+ else:
105
+ formatter_command = None
106
+ if isinstance(formatter_command, str):
107
+ formatter_command = [formatter_command]
108
+ return formatter_command
shrinkray/passes/bytes.py CHANGED
@@ -1,5 +1,26 @@
1
+ """Byte-level reduction passes.
2
+
3
+ This module provides reduction passes that operate on raw bytes.
4
+ These are the foundation of Shrink Ray's reduction strategy, as
5
+ all file formats ultimately reduce to bytes.
6
+
7
+ Key passes:
8
+ - hollow: Keeps only start/end of bracketed regions
9
+ - lift_braces: Replaces {...} with its content
10
+ - debracket: Removes matching bracket pairs
11
+ - delete_byte_spans: Deletes contiguous byte ranges
12
+ - short_deletions: Deletes small (1-10 byte) sequences
13
+ - remove_indents/remove_whitespace: Whitespace normalization
14
+ - lower_bytes: Reduces byte values toward 0
15
+ - lexeme_based_deletions: Deletes between repeated patterns
16
+
17
+ Formats:
18
+ - Split(delimiter): Parses bytes into list of segments
19
+ - Tokenize(): Parses bytes into tokens (identifiers, numbers, etc.)
20
+ """
21
+
1
22
  from collections import defaultdict, deque
2
- from typing import Sequence
23
+ from collections.abc import Sequence
3
24
 
4
25
  from attrs import define
5
26
 
@@ -9,6 +30,8 @@ from shrinkray.passes.patching import Cuts, Patches, apply_patches
9
30
 
10
31
  @define(frozen=True)
11
32
  class Encoding(Format[bytes, str]):
33
+ """Format that decodes/encodes bytes using a character encoding."""
34
+
12
35
  encoding: str
13
36
 
14
37
  def __repr__(self) -> str:
@@ -27,6 +50,18 @@ class Encoding(Format[bytes, str]):
27
50
 
28
51
  @define(frozen=True)
29
52
  class Split(Format[bytes, list[bytes]]):
53
+ """Format that splits bytes by a delimiter.
54
+
55
+ This enables sequence-based passes to work on lines, statements, etc.
56
+
57
+ Example:
58
+ # Delete duplicate lines
59
+ compose(Split(b"\\n"), delete_duplicates)
60
+
61
+ # Delete blocks of 1-10 semicolon-separated statements
62
+ compose(Split(b";"), block_deletion(1, 10))
63
+ """
64
+
30
65
  splitter: bytes
31
66
 
32
67
  def __repr__(self) -> str:
@@ -44,6 +79,16 @@ class Split(Format[bytes, list[bytes]]):
44
79
 
45
80
 
46
81
  def find_ngram_endpoints(value: bytes) -> list[tuple[int, list[int]]]:
82
+ """Find repeated byte patterns and their positions.
83
+
84
+ This is used by lexeme_based_deletions to identify regions between
85
+ repeated patterns that might be deletable. For example, in code like:
86
+ print("hello"); print("world"); print("test")
87
+ The repeated "print" patterns suggest the semicolon-separated regions
88
+ might be independently deletable.
89
+
90
+ Returns a list of (ngram_length, [positions]) tuples.
91
+ """
47
92
  if len(set(value)) <= 1:
48
93
  return []
49
94
  queue: deque[tuple[int, Sequence[int]]] = deque([(0, range(len(value)))])
@@ -80,12 +125,24 @@ def find_ngram_endpoints(value: bytes) -> list[tuple[int, list[int]]]:
80
125
 
81
126
 
82
127
  def tokenize(text: bytes) -> list[bytes]:
128
+ """Split bytes into tokens: identifiers, numbers, and other characters.
129
+
130
+ This is a simple tokenizer that groups:
131
+ - Identifiers: [A-Za-z][A-Za-z0-9_]*
132
+ - Numbers: [0-9]+ (with optional decimal point)
133
+ - Spaces: runs of spaces
134
+ - Everything else: individual characters
135
+
136
+ Example:
137
+ tokenize(b"foo = 123") -> [b"foo", b" ", b"=", b" ", b"123"]
138
+ """
83
139
  result: list[bytes] = []
84
140
  i = 0
85
141
  while i < len(text):
86
142
  c = bytes([text[i]])
87
143
  j = i + 1
88
144
  if b"A" <= c <= b"z":
145
+ # Identifier: consume alphanumeric and underscore
89
146
  while j < len(text) and (
90
147
  b"A"[0] <= text[j] <= b"z"[0]
91
148
  or text[j] == b"_"[0]
@@ -93,11 +150,13 @@ def tokenize(text: bytes) -> list[bytes]:
93
150
  ):
94
151
  j += 1
95
152
  elif b"0" <= c <= b"9":
153
+ # Number: consume digits and decimal point
96
154
  while j < len(text) and (
97
155
  text[j] == b"."[0] or b"0"[0] <= text[j] <= b"9"[0]
98
156
  ):
99
157
  j += 1
100
158
  elif c == b" ":
159
+ # Space run: consume consecutive spaces
101
160
  while j < len(text) and (text[j] == b" "[0]):
102
161
  j += 1
103
162
  result.append(text[i:j])
@@ -112,11 +171,25 @@ MAX_DELETE_INTERVAL = 8
112
171
  async def lexeme_based_deletions(
113
172
  problem: ReductionProblem[bytes], min_size: int = 8
114
173
  ) -> None:
174
+ """Delete regions between repeated byte patterns.
175
+
176
+ This pass finds repeated patterns (like repeated keywords or punctuation)
177
+ and tries to delete the regions between them. For code like:
178
+
179
+ print("a"); print("b"); print("c")
180
+
181
+ The repeated "print(" pattern suggests each print statement might be
182
+ independently deletable. This pass identifies such regions and tries
183
+ to delete them.
184
+
185
+ Only regions >= min_size bytes are considered to avoid tiny deletions.
186
+ """
115
187
  intervals_by_k: dict[int, set[tuple[int, int]]] = defaultdict(set)
116
188
 
117
189
  for k, endpoints in find_ngram_endpoints(problem.current_test_case):
118
- intervals_by_k[k].update(zip(endpoints, endpoints[1:]))
190
+ intervals_by_k[k].update(zip(endpoints, endpoints[1:], strict=False))
119
191
 
192
+ # Sort by ngram length (longer patterns first) then by interval size
120
193
  intervals_to_delete = [
121
194
  t
122
195
  for _, intervals in sorted(intervals_by_k.items(), reverse=True)
@@ -132,10 +205,21 @@ async def delete_intervals(
132
205
  intervals_to_delete: list[tuple[int, int]],
133
206
  shuffle: bool = False,
134
207
  ) -> None:
208
+ """Try to delete each of the given byte intervals.
209
+
210
+ Each interval (start, end) represents a contiguous region to try deleting.
211
+ The patch applier will find which intervals can be deleted independently
212
+ and combine compatible deletions.
213
+ """
135
214
  await apply_patches(problem, Cuts(), [[t] for t in intervals_to_delete])
136
215
 
137
216
 
138
217
  def brace_intervals(target: bytes, brace: bytes) -> list[tuple[int, int]]:
218
+ """Find all intervals enclosed by matching brace pairs.
219
+
220
+ Given a two-byte brace string like b"{}", returns intervals for content
221
+ between each matched open/close pair. Handles nesting correctly.
222
+ """
139
223
  open, close = brace
140
224
  intervals: list[tuple[int, int]] = []
141
225
  stack: list[int] = []
@@ -151,6 +235,16 @@ def brace_intervals(target: bytes, brace: bytes) -> list[tuple[int, int]]:
151
235
 
152
236
 
153
237
  async def debracket(problem: ReductionProblem[bytes]) -> None:
238
+ """Remove matching bracket pairs, keeping their content.
239
+
240
+ Example transformations:
241
+ "(x + y)" -> "x + y"
242
+ "[1, 2]" -> "1, 2"
243
+ "{foo}" -> "foo"
244
+
245
+ This is useful when brackets become unnecessary after other reductions,
246
+ e.g., if a function call was simplified to just its first argument.
247
+ """
154
248
  cuts = [
155
249
  [(u - 1, u), (v, v + 1)]
156
250
  for brackets in [b"{}", b"()", b"[]"]
@@ -164,6 +258,10 @@ async def debracket(problem: ReductionProblem[bytes]) -> None:
164
258
 
165
259
 
166
260
  def quote_intervals(target: bytes) -> list[tuple[int, int]]:
261
+ """Find all intervals enclosed by matching quote pairs.
262
+
263
+ Returns intervals between consecutive single or double quotes.
264
+ """
167
265
  indices: dict[int, list[int]] = defaultdict(list)
168
266
  for i, c in enumerate(target):
169
267
  indices[c].append(i)
@@ -178,6 +276,20 @@ def quote_intervals(target: bytes) -> list[tuple[int, int]]:
178
276
 
179
277
 
180
278
  async def hollow(problem: ReductionProblem[bytes]) -> None:
279
+ """Delete the contents of bracketed and quoted regions.
280
+
281
+ Example transformations:
282
+ '{"lots": "of json"}' -> '{}'
283
+ "[1, 2, 3, 4, 5]" -> "[]"
284
+ '"long string here"' -> '""'
285
+
286
+ This is one of the most effective early passes: it quickly removes
287
+ large chunks of content from structured data, keeping only the
288
+ "skeleton" of brackets and quotes.
289
+
290
+ Intervals are sorted by size (smallest first) to maximize the chance
291
+ of finding independent deletions that can be combined.
292
+ """
181
293
  target = problem.current_test_case
182
294
  intervals: list[tuple[int, int]] = []
183
295
  for b in [
@@ -194,6 +306,15 @@ async def hollow(problem: ReductionProblem[bytes]) -> None:
194
306
 
195
307
 
196
308
  async def short_deletions(problem: ReductionProblem[bytes]) -> None:
309
+ """Try deleting every small (1-10 byte) substring.
310
+
311
+ This is a brute-force pass that tries all possible small deletions.
312
+ It's expensive but effective for cleaning up small syntax elements
313
+ that other passes miss.
314
+
315
+ Example: After other passes simplify "foo(x, y)" to "foo(x)", this
316
+ pass might find that deleting ", y" or "x, " works.
317
+ """
197
318
  target = problem.current_test_case
198
319
  await delete_intervals(
199
320
  problem,
@@ -206,6 +327,19 @@ async def short_deletions(problem: ReductionProblem[bytes]) -> None:
206
327
 
207
328
 
208
329
  async def lift_braces(problem: ReductionProblem[bytes]) -> None:
330
+ """Replace outer braces with inner braces' content.
331
+
332
+ For nested braces like {A{B}C}, this tries to replace the outer
333
+ braces with just the inner content: {A{B}C} -> {B}
334
+
335
+ Example transformations:
336
+ "if (x) { if (y) { z } }" -> "if (x) { z }"
337
+ "{ outer { inner } more }" -> "{ inner }"
338
+
339
+ This is useful for eliminating wrapper blocks while keeping the
340
+ essential nested structure. It complements debracket (which removes
341
+ brackets entirely) and hollow (which empties brackets).
342
+ """
209
343
  target = problem.current_test_case
210
344
 
211
345
  open_brace, close_brace = b"{}"
@@ -214,6 +348,7 @@ async def lift_braces(problem: ReductionProblem[bytes]) -> None:
214
348
 
215
349
  results: list[tuple[int, int, list[tuple[int, int]]]] = []
216
350
 
351
+ # Track brace nesting and record parent-child relationships
217
352
  for i, c in enumerate(target):
218
353
  if c == open_brace:
219
354
  start_stack.append(i)
@@ -227,6 +362,7 @@ async def lift_braces(problem: ReductionProblem[bytes]) -> None:
227
362
  if end > start:
228
363
  results.append((start, end, children))
229
364
 
365
+ # For each parent-child pair, try deleting parent content around child
230
366
  cuts: list[list[tuple[int, int]]] = []
231
367
  for start, end, children in results:
232
368
  for child_start, child_end in children:
@@ -252,6 +388,12 @@ class Tokenize(Format[bytes, list[bytes]]):
252
388
 
253
389
 
254
390
  async def delete_byte_spans(problem: ReductionProblem[bytes]) -> None:
391
+ """Delete spans between occurrences of the same byte value.
392
+
393
+ For each byte value that appears multiple times, tries to delete
394
+ regions from the start to the first occurrence, between consecutive
395
+ occurrences, and from the last occurrence to the end.
396
+ """
255
397
  indices: dict[int, list[int]] = defaultdict(list)
256
398
  target = problem.current_test_case
257
399
  for i, c in enumerate(target):
@@ -262,13 +404,18 @@ async def delete_byte_spans(problem: ReductionProblem[bytes]) -> None:
262
404
  for c, ix in sorted(indices.items()):
263
405
  if len(ix) > 1:
264
406
  spans.append((0, ix[0] + 1))
265
- spans.extend(zip(ix, ix[1:]))
407
+ spans.extend(zip(ix, ix[1:], strict=False))
266
408
  spans.append((ix[-1], len(target)))
267
409
 
268
410
  await apply_patches(problem, Cuts(), [[s] for s in spans])
269
411
 
270
412
 
271
413
  async def remove_indents(problem: ReductionProblem[bytes]) -> None:
414
+ """Remove leading spaces from lines.
415
+
416
+ Finds runs of spaces following newlines and tries to delete them.
417
+ Useful for normalizing indentation in code.
418
+ """
272
419
  target = problem.current_test_case
273
420
  spans: list[list[tuple[int, int]]] = []
274
421
 
@@ -288,6 +435,12 @@ async def remove_indents(problem: ReductionProblem[bytes]) -> None:
288
435
 
289
436
 
290
437
  async def remove_whitespace(problem: ReductionProblem[bytes]) -> None:
438
+ """Collapse runs of whitespace.
439
+
440
+ Finds consecutive whitespace characters and tries to remove all but
441
+ the first, or all but the first two. Complements remove_indents by
442
+ handling whitespace anywhere in the file.
443
+ """
291
444
  target = problem.current_test_case
292
445
  spans: list[list[tuple[int, int]]] = []
293
446
 
@@ -332,6 +485,11 @@ class NewlineReplacer(Patches[frozenset[int], bytes]):
332
485
 
333
486
 
334
487
  async def replace_space_with_newlines(problem: ReductionProblem[bytes]) -> None:
488
+ """Replace spaces and tabs with newlines.
489
+
490
+ Tries replacing each space or tab with a newline. This can help
491
+ normalize formatting and may enable other line-based reductions.
492
+ """
335
493
  await apply_patches(
336
494
  problem,
337
495
  NewlineReplacer(),
@@ -372,6 +530,12 @@ class ByteReplacement(Patches[ReplacementPatch, bytes]):
372
530
 
373
531
 
374
532
  async def lower_bytes(problem: ReductionProblem[bytes]) -> None:
533
+ """Globally replace byte values with smaller ones.
534
+
535
+ For each distinct byte value in the input, tries replacing all
536
+ occurrences with smaller values (0, 1, half, value-1, whitespace).
537
+ Also tries replacing pairs of bytes with the same smaller value.
538
+ """
375
539
  sources = sorted(set(problem.current_test_case))
376
540
 
377
541
  patches = [
@@ -417,6 +581,12 @@ class IndividualByteReplacement(Patches[ReplacementPatch, bytes]):
417
581
 
418
582
 
419
583
  async def lower_individual_bytes(problem: ReductionProblem[bytes]) -> None:
584
+ """Replace individual bytes at specific positions with smaller values.
585
+
586
+ Unlike lower_bytes (which replaces all occurrences of a byte value),
587
+ this tries reducing individual byte positions. Also handles carry-like
588
+ patterns where decrementing one byte allows the next to become 255.
589
+ """
420
590
  initial = problem.current_test_case
421
591
  patches = [
422
592
  {i: r}
@@ -434,18 +604,18 @@ async def lower_individual_bytes(problem: ReductionProblem[bytes]) -> None:
434
604
  RegionReplacementPatch = list[tuple[int, int, int]]
435
605
 
436
606
 
437
- class RegionReplacement(Patches[ReplacementPatch, bytes]):
607
+ class RegionReplacement(Patches[RegionReplacementPatch, bytes]):
438
608
  @property
439
- def empty(self) -> ReplacementPatch:
609
+ def empty(self) -> RegionReplacementPatch:
440
610
  return []
441
611
 
442
- def combine(self, *patches: ReplacementPatch) -> ReplacementPatch:
443
- result = []
612
+ def combine(self, *patches: RegionReplacementPatch) -> RegionReplacementPatch:
613
+ result: RegionReplacementPatch = []
444
614
  for p in patches:
445
615
  result.extend(p)
446
616
  return result
447
617
 
448
- def apply(self, patch: ReplacementPatch, target: bytes) -> bytes:
618
+ def apply(self, patch: RegionReplacementPatch, target: bytes) -> bytes:
449
619
  result = bytearray(target)
450
620
  for i, j, d in patch:
451
621
  if d < result[i]:
@@ -453,11 +623,16 @@ class RegionReplacement(Patches[ReplacementPatch, bytes]):
453
623
  result[k] = d
454
624
  return bytes(result)
455
625
 
456
- def size(self, patch: ReplacementPatch) -> int:
626
+ def size(self, patch: RegionReplacementPatch) -> int:
457
627
  return 0
458
628
 
459
629
 
460
630
  async def short_replacements(problem: ReductionProblem[bytes]) -> None:
631
+ """Replace short regions with uniform byte values.
632
+
633
+ Tries replacing 1-4 byte regions with uniform values like 0, 1,
634
+ space, newline, or period. Useful for simplifying small sequences.
635
+ """
461
636
  target = problem.current_test_case
462
637
  patches = [
463
638
  [(i, j, c)]
@@ -505,7 +680,7 @@ async def sort_whitespace(problem: ReductionProblem[bytes]) -> None:
505
680
  i += 1
506
681
  continue
507
682
 
508
- async def can_move_to_whitespace(k):
683
+ async def can_move_to_whitespace(k: int) -> bool:
509
684
  if i + k > len(problem.current_test_case):
510
685
  return False
511
686
 
@@ -534,6 +709,11 @@ STANDARD_SUBSTITUTIONS = [(b"\0\0", b"\1"), (b"\0\0", b"\xff")]
534
709
 
535
710
 
536
711
  async def standard_substitutions(problem: ReductionProblem[bytes]):
712
+ """Apply standard byte sequence substitutions.
713
+
714
+ Tries some specific byte sequence replacements that are sometimes
715
+ helpful, primarily for handling edge cases in artificial test inputs.
716
+ """
537
717
  i = 0
538
718
  while i < len(problem.current_test_case):
539
719
  for k, v in STANDARD_SUBSTITUTIONS:
@@ -545,3 +725,30 @@ async def standard_substitutions(problem: ReductionProblem[bytes]):
545
725
  break
546
726
  else:
547
727
  i += 1
728
+
729
+
730
+ async def line_sorter(problem: ReductionProblem[bytes]):
731
+ """Sort lines into a more canonical order.
732
+
733
+ Uses insertion sort to reorder lines, swapping adjacent lines when
734
+ doing so maintains interestingness and produces a lexicographically
735
+ smaller result. This normalizes line order for reproducibility.
736
+ """
737
+ lines = problem.current_test_case.split(b"\n")
738
+ i = 1
739
+ while i < len(lines):
740
+ j = i
741
+ while j > 0:
742
+ u = lines[j - 1]
743
+ v = lines[j]
744
+ if v + u < u + v:
745
+ attempt = list(lines)
746
+ attempt[j - 1], attempt[j] = attempt[j], attempt[j - 1]
747
+ if not await problem.is_interesting(b"\n".join(attempt)):
748
+ break
749
+ else:
750
+ j -= 1
751
+ lines = attempt
752
+ else:
753
+ break
754
+ i += 1