dissect.util 3.24.dev2__cp314-cp314t-manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dissect.util might be problematic. Click here for more details.

Files changed (43) hide show
  1. dissect/util/__init__.py +20 -0
  2. dissect/util/_build.py +17 -0
  3. dissect/util/_native/__init__.pyi +3 -0
  4. dissect/util/_native/compression/__init__.pyi +3 -0
  5. dissect/util/_native/compression/lz4.pyi +7 -0
  6. dissect/util/_native/compression/lzo.pyi +3 -0
  7. dissect/util/_native/hash/__init__.py +3 -0
  8. dissect/util/_native/hash/crc32c.py +2 -0
  9. dissect/util/_native.cpython-314t-aarch64-linux-gnu.so +0 -0
  10. dissect/util/compression/__init__.py +45 -0
  11. dissect/util/compression/lz4.py +95 -0
  12. dissect/util/compression/lzbitmap.py +130 -0
  13. dissect/util/compression/lzfse.py +467 -0
  14. dissect/util/compression/lznt1.py +92 -0
  15. dissect/util/compression/lzo.py +118 -0
  16. dissect/util/compression/lzvn.py +241 -0
  17. dissect/util/compression/lzxpress.py +80 -0
  18. dissect/util/compression/lzxpress_huffman.py +184 -0
  19. dissect/util/compression/sevenbit.py +77 -0
  20. dissect/util/compression/xz.py +112 -0
  21. dissect/util/cpio.py +226 -0
  22. dissect/util/encoding/__init__.py +0 -0
  23. dissect/util/encoding/surrogateescape.py +21 -0
  24. dissect/util/exceptions.py +6 -0
  25. dissect/util/hash/__init__.py +28 -0
  26. dissect/util/hash/crc32.py +55 -0
  27. dissect/util/hash/crc32c.py +60 -0
  28. dissect/util/hash/jenkins.py +102 -0
  29. dissect/util/ldap.py +237 -0
  30. dissect/util/plist.py +156 -0
  31. dissect/util/sid.py +81 -0
  32. dissect/util/stream.py +772 -0
  33. dissect/util/tools/__init__.py +0 -0
  34. dissect/util/tools/dump_nskeyedarchiver.py +61 -0
  35. dissect/util/ts.py +295 -0
  36. dissect/util/xmemoryview.py +117 -0
  37. dissect_util-3.24.dev2.dist-info/METADATA +89 -0
  38. dissect_util-3.24.dev2.dist-info/RECORD +43 -0
  39. dissect_util-3.24.dev2.dist-info/WHEEL +5 -0
  40. dissect_util-3.24.dev2.dist-info/entry_points.txt +2 -0
  41. dissect_util-3.24.dev2.dist-info/licenses/COPYRIGHT +5 -0
  42. dissect_util-3.24.dev2.dist-info/licenses/LICENSE +201 -0
  43. dissect_util-3.24.dev2.dist-info/top_level.txt +1 -0
dissect/util/stream.py ADDED
@@ -0,0 +1,772 @@
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import os
5
+ import sys
6
+ import zlib
7
+ from bisect import bisect_left, bisect_right
8
+ from threading import Lock
9
+ from typing import BinaryIO
10
+
11
+ STREAM_BUFFER_SIZE = int(os.getenv("DISSECT_STREAM_BUFFER_SIZE", io.DEFAULT_BUFFER_SIZE))
12
+
13
+
14
+ class AlignedStream(io.RawIOBase):
15
+ """Basic buffered stream that provides aligned reads.
16
+
17
+ Must be subclassed for various stream implementations. Subclasses can implement:
18
+ - :meth:`~AlignedStream._read`
19
+ - :meth:`~AlignedStream._seek`
20
+
21
+ The offset and length for ``_read`` are guaranteed to be aligned for streams of a known size.
22
+ If your stream has an unknown size (i.e. ``size == None``), reads of length ``-1`` (i.e. read until EOF) will be
23
+ passed through to your implementation of ``_read``.
24
+ The only time that overriding ``_seek`` would make sense is if there's no known size of your stream,
25
+ but still want to provide ``SEEK_END`` functionality.
26
+
27
+ Most subclasses of ``AlignedStream`` take one or more file-like objects as source.
28
+ Operations on these subclasses, like reading, will modify the source file-like object as a side effect.
29
+
30
+ Args:
31
+ size: The size of the stream. This is used in read and seek operations. ``None`` if unknown.
32
+ align: The alignment size. Read operations are aligned on this boundary. Also determines buffer size.
33
+
34
+ .. automethod:: _read
35
+ .. automethod:: _seek
36
+ """
37
+
38
+ def __init__(self, size: int | None = None, align: int = STREAM_BUFFER_SIZE):
39
+ super().__init__()
40
+ self.size = size
41
+ self.align = align
42
+
43
+ self._pos = 0
44
+ self._pos_align = 0
45
+
46
+ self._buf = None
47
+ self._lock = Lock()
48
+
49
+ def readable(self) -> bool:
50
+ """Indicate that the stream is readable."""
51
+ return True
52
+
53
+ def seekable(self) -> bool:
54
+ """Indicate that the stream is seekable."""
55
+ return True
56
+
57
+ def seek(self, pos: int, whence: int = io.SEEK_SET) -> int:
58
+ """Seek the stream to the specified position.
59
+
60
+ Returns:
61
+ The new stream position after seeking.
62
+ """
63
+ with self._lock:
64
+ pos = self._seek(pos, whence)
65
+ self._set_pos(pos)
66
+
67
+ return pos
68
+
69
+ def _seek(self, pos: int, whence: int = io.SEEK_SET) -> int:
70
+ """Calculate and return the new stream position after a seek."""
71
+ if whence == io.SEEK_SET:
72
+ if pos < 0:
73
+ raise ValueError(f"negative seek position {pos}")
74
+ elif whence == io.SEEK_CUR:
75
+ pos = max(0, self._pos + pos)
76
+ elif whence == io.SEEK_END:
77
+ if self.size is None:
78
+ raise IOError("invalid whence value for stream with no size")
79
+ pos = max(0, self.size + pos)
80
+ else:
81
+ raise IOError("invalid whence value")
82
+
83
+ return pos
84
+
85
+ def _set_pos(self, pos: int) -> None:
86
+ """Update the position and aligned position within the stream."""
87
+ new_pos_align = pos - (pos % self.align)
88
+
89
+ if self._pos_align != new_pos_align:
90
+ self._pos_align = new_pos_align
91
+ self._buf = None
92
+
93
+ self._pos = pos
94
+
95
+ def tell(self) -> int:
96
+ """Return current stream position."""
97
+ return self._pos
98
+
99
+ def _fill_buf(self) -> None:
100
+ """Fill the alignment buffer if we can."""
101
+ if self._buf or (self.size is not None and (self.size <= self._pos or self.size <= self._pos_align)):
102
+ # Don't fill the buffer if:
103
+ # - We already have a buffer
104
+ # - The stream position is at the end (or beyond) the stream size
105
+ return
106
+
107
+ self._buf = self._read(self._pos_align, self.align)
108
+
109
+ def read(self, n: int = -1) -> bytes:
110
+ """Read and return up to ``n`` bytes, or read to the end of the stream if ``n`` is ``-1``.
111
+
112
+ Returns an empty bytes object on EOF.
113
+ """
114
+ if n is not None and n < -1:
115
+ raise ValueError("invalid number of bytes to read")
116
+
117
+ r = []
118
+ size = self.size
119
+ align = self.align
120
+
121
+ with self._lock:
122
+ if size is None and n == -1:
123
+ r = []
124
+ if self._buf:
125
+ buffer_pos = self._pos - self._pos_align
126
+ r.append(self._buf[buffer_pos:])
127
+ self._set_pos(self._pos_align + align)
128
+
129
+ r.append(self._read(self._pos_align, -1))
130
+
131
+ buf = b"".join(r)
132
+ self._set_pos(self._pos + len(buf))
133
+ return buf
134
+
135
+ # If we know the stream size, adjust n
136
+ if size is not None:
137
+ remaining = size - self._pos
138
+ n = remaining if n == -1 else min(n, remaining)
139
+
140
+ # Short path for when it turns out we don't need to read anything
141
+ if n == 0 or (size is not None and size <= self._pos):
142
+ return b""
143
+
144
+ # Read misaligned start from buffer
145
+ if self._pos != self._pos_align:
146
+ self._fill_buf()
147
+
148
+ buffer_pos = self._pos - self._pos_align
149
+ remaining = align - buffer_pos
150
+ buffer_len = min(n, remaining)
151
+
152
+ r.append(self._buf[buffer_pos : buffer_pos + buffer_len])
153
+
154
+ n -= buffer_len
155
+ self._set_pos(self._pos + buffer_len)
156
+
157
+ # Aligned blocks
158
+ if n >= align:
159
+ count, n = divmod(n, align)
160
+
161
+ read_len = count * align
162
+ r.append(self._read(self._pos, read_len))
163
+
164
+ self._set_pos(self._pos + read_len)
165
+
166
+ # Misaligned remaining bytes
167
+ if n > 0:
168
+ self._fill_buf()
169
+ r.append(self._buf[:n])
170
+ self._set_pos(self._pos + n)
171
+
172
+ return b"".join(r)
173
+
174
+ def readinto(self, b: bytearray) -> int:
175
+ """Read bytes into a pre-allocated bytes-like object b.
176
+
177
+ Returns an int representing the number of bytes read (0 for EOF).
178
+ """
179
+ buf = self.read(len(b))
180
+ length = len(buf)
181
+ b[:length] = buf
182
+ return length
183
+
184
+ def _read(self, offset: int, length: int) -> bytes:
185
+ """Read method that backs this aligned stream."""
186
+ raise NotImplementedError("_read needs to be implemented by subclass")
187
+
188
+ def readall(self) -> bytes:
189
+ """Read until end of stream."""
190
+ return self.read()
191
+
192
+ def readoffset(self, offset: int, length: int) -> bytes:
193
+ """Convenience method to read from a given offset.
194
+
195
+ Args:
196
+ offset: The offset in the stream to read from.
197
+ length: The number of bytes to read.
198
+ """
199
+ self.seek(offset)
200
+ return self.read(length)
201
+
202
+ def peek(self, n: int) -> bytes:
203
+ """Convenience method to peek from the current offset without advancing the stream position.
204
+
205
+ Args:
206
+ n: The number of bytes to peek.
207
+ """
208
+ pos = self._pos
209
+ data = self.read(n)
210
+ self._set_pos(pos)
211
+ return data
212
+
213
+ def close(self) -> None:
214
+ """Close the stream. Does nothing by default."""
215
+
216
+
217
+ class RangeStream(AlignedStream):
218
+ """Create a stream with a specific range from another file-like object.
219
+
220
+ ASCII representation::
221
+
222
+ Source file-like object
223
+ |................................................|
224
+ RangeStream with offset and size
225
+ |............................|
226
+
227
+ Args:
228
+ fh: The source file-like object.
229
+ offset: The offset the stream should start from on the source file-like object.
230
+ size: The size the stream should be.
231
+ align: The alignment size.
232
+ """
233
+
234
+ def __init__(self, fh: BinaryIO, offset: int, size: int | None, align: int = STREAM_BUFFER_SIZE):
235
+ super().__init__(size, align)
236
+ self._fh = fh
237
+ self.offset = offset
238
+
239
+ def _seek(self, pos: int, whence: int = io.SEEK_SET) -> int:
240
+ if self.size is None and whence == io.SEEK_END:
241
+ if (pos := self._fh.seek(pos, whence)) is None:
242
+ pos = self._fh.tell()
243
+ return max(0, pos - self.offset)
244
+ return super()._seek(pos, whence)
245
+
246
+ def _read(self, offset: int, length: int) -> bytes:
247
+ read_length = min(length, self.size - offset) if self.size else length
248
+ self._fh.seek(self.offset + offset)
249
+ return self._fh.read(read_length)
250
+
251
+
252
+ class RelativeStream(RangeStream):
253
+ """Create a relative stream from another file-like object.
254
+
255
+ ASCII representation::
256
+
257
+ Source file-like object
258
+ |................................................|
259
+ RelativeStream with offset
260
+ |........................................|
261
+
262
+ Args:
263
+ fh: The source file-like object.
264
+ offset: The offset the stream should start from on the source file-like object.
265
+ size: Optional size the stream should be.
266
+ align: The alignment size.
267
+ """
268
+
269
+ def __init__(self, fh: BinaryIO, offset: int, size: int | None = None, align: int = STREAM_BUFFER_SIZE):
270
+ super().__init__(fh, offset, size, align)
271
+
272
+
273
+ class BufferedStream(RelativeStream):
274
+ """Create a buffered stream from another file-like object.
275
+
276
+ Optionally start from a specific offset.
277
+
278
+ Args:
279
+ fh: The source file-like object.
280
+ offset: The offset the stream should start from.
281
+ size: The size the stream should be.
282
+ align: The alignment size.
283
+ """
284
+
285
+ def __init__(self, fh: BinaryIO, offset: int = 0, size: int | None = None, align: int = STREAM_BUFFER_SIZE):
286
+ super().__init__(fh, offset, size, align)
287
+
288
+
289
+ class MappingStream(AlignedStream):
290
+ """Create a stream from multiple mapped file-like objects.
291
+
292
+ Args:
293
+ size: The size the stream should be.
294
+ align: The alignment size.
295
+ """
296
+
297
+ def __init__(self, size: int | None = None, align: int = STREAM_BUFFER_SIZE):
298
+ super().__init__(size, align)
299
+ self._runs: list[tuple[int, int, BinaryIO, int]] = []
300
+
301
+ def add(self, offset: int, size: int, fh: BinaryIO, file_offset: int = 0) -> None:
302
+ """Add a file-like object to the stream.
303
+
304
+ Args:
305
+ offset: The offset in the stream this fh maps to.
306
+ size: The size that this mapped fh spans in the stream.
307
+ fh: The file-like object to map.
308
+ file_offset: The offset in the fh to start from.
309
+
310
+ Note that there is no check on overlapping offsets and/or sizes.
311
+ """
312
+ self._runs.append((offset, size, fh, file_offset))
313
+ self._runs = sorted(self._runs, key=lambda run: run[0])
314
+ self._buf = None
315
+ self.size = self._runs[-1][0] + self._runs[-1][1]
316
+
317
+ def _get_run_idx(self, offset: int) -> tuple[int, int, BinaryIO, int]:
318
+ """Find a mapping run for a given offset.
319
+
320
+ Args:
321
+ offset: The offset to find a mapping for.
322
+
323
+ Returns:
324
+ The run tuple if found.
325
+
326
+ Raises:
327
+ IOError: If no mapping is found for the given offset.
328
+ """
329
+ for idx, run in enumerate(self._runs):
330
+ if run[0] <= offset < run[0] + run[1]:
331
+ return idx
332
+
333
+ raise EOFError(f"No mapping for offset {offset}")
334
+
335
+ def _read(self, offset: int, length: int) -> bytes:
336
+ result = []
337
+
338
+ run_idx = self._get_run_idx(offset)
339
+ runlist_len = len(self._runs)
340
+ size = self.size
341
+
342
+ while length > 0:
343
+ if run_idx >= runlist_len:
344
+ # We somehow requested more data than we have runs for
345
+ break
346
+
347
+ run_offset, run_size, run_fh, run_file_offset = self._runs[run_idx]
348
+
349
+ if run_offset > offset:
350
+ # We landed in a gap, stop reading
351
+ break
352
+
353
+ run_pos = offset - run_offset
354
+ run_remaining = run_size - run_pos
355
+
356
+ if run_remaining < 0:
357
+ break
358
+
359
+ read_count = min(size - offset, min(run_remaining, length))
360
+
361
+ run_fh.seek(run_file_offset + run_pos)
362
+ result.append(run_fh.read(read_count))
363
+
364
+ offset += read_count
365
+ length -= read_count
366
+ run_idx += 1
367
+
368
+ return b"".join(result)
369
+
370
+
371
+ class RunlistStream(AlignedStream):
372
+ """Create a stream from multiple runs on another file-like object.
373
+
374
+ This is common in filesystems, where file data information is stored in "runs".
375
+ A run is a ``(block_offset, block_count)`` tuple, meaning the amount of consecutive blocks from a
376
+ specific starting block. A block_offset of ``None`` represents a sparse run, meaning it must simply
377
+ return all ``\\x00`` bytes.
378
+
379
+ Args:
380
+ fh: The source file-like object.
381
+ runlist: The runlist for this stream in block units.
382
+ size: The size of the stream. This can be smaller than the total sum of blocks (to account for slack space).
383
+ block_size: The block size in bytes.
384
+ align: Optional alignment that differs from the block size, otherwise ``block_size`` is used as alignment.
385
+ """
386
+
387
+ def __init__(
388
+ self, fh: BinaryIO, runlist: list[tuple[int, int]], size: int, block_size: int, align: int | None = None
389
+ ):
390
+ super().__init__(size, align or block_size)
391
+
392
+ if isinstance(fh, RunlistStream):
393
+ self._fh = fh._fh
394
+ else:
395
+ self._fh = fh
396
+
397
+ self._runlist = []
398
+ self._runlist_offsets = []
399
+
400
+ self.runlist = runlist
401
+ self.block_size = block_size
402
+
403
+ @property
404
+ def runlist(self) -> list[tuple[int, int]]:
405
+ return self._runlist
406
+
407
+ @runlist.setter
408
+ def runlist(self, runlist: list[tuple[int, int]]) -> None:
409
+ self._runlist = runlist
410
+ self._runlist_offsets = []
411
+
412
+ offset = 0
413
+ # Create a list of starting offsets for each run so we can bisect that quickly when reading
414
+ for _, block_count in self._runlist:
415
+ if offset != 0:
416
+ self._runlist_offsets.append(offset)
417
+ offset += block_count
418
+
419
+ self._buf = None
420
+
421
+ def _read(self, offset: int, length: int) -> bytes:
422
+ r = []
423
+
424
+ block_offset = offset // self.block_size
425
+
426
+ run_idx = bisect_right(self._runlist_offsets, block_offset)
427
+ runlist_len = len(self.runlist)
428
+ size = self.size
429
+
430
+ while length > 0:
431
+ if run_idx >= runlist_len:
432
+ # We somehow requested more data than we have runs for
433
+ break
434
+
435
+ # If run_idx == 0, we only have a single run
436
+ run_block_pos = 0 if run_idx == 0 else self._runlist_offsets[run_idx - 1]
437
+ run_block_offset, run_block_count = self.runlist[run_idx]
438
+
439
+ run_size = run_block_count * self.block_size
440
+ run_pos = offset - run_block_pos * self.block_size
441
+ run_remaining = run_size - run_pos
442
+
443
+ # Sometimes the self.size is way larger than what we actually have runs for?
444
+ # Stop reading if we reach a negative run_remaining
445
+ if run_remaining < 0:
446
+ break
447
+
448
+ read_count = min(size - offset, min(run_remaining, length))
449
+
450
+ # Sparse run
451
+ if run_block_offset is None:
452
+ r.append(b"\x00" * read_count)
453
+ else:
454
+ self._fh.seek(run_block_offset * self.block_size + run_pos)
455
+ r.append(self._fh.read(read_count))
456
+
457
+ offset += read_count
458
+ length -= read_count
459
+ run_idx += 1
460
+
461
+ return b"".join(r)
462
+
463
+
464
+ class OverlayStream(AlignedStream):
465
+ """Create a stream from another file-like object with the ability to overlay other streams or bytes.
466
+
467
+ Useful for patching large file-like objects without having to cache the entire contents.
468
+ First wrap the original stream in this class, and then call ``add()`` with the offset and data to overlay.
469
+
470
+ Args:
471
+ fh: The source file-like object.
472
+ size: The size the stream should be.
473
+ align: The alignment size.
474
+ """
475
+
476
+ def __init__(self, fh: BinaryIO, size: int | None = None, align: int = STREAM_BUFFER_SIZE):
477
+ super().__init__(size, align)
478
+ self._fh = fh
479
+ self.overlays: dict[int, tuple[int, BinaryIO]] = {}
480
+ self._lookup: list[int] = []
481
+
482
+ def add(self, offset: int, data: bytes | BinaryIO, size: int | None = None) -> None:
483
+ """Add an overlay at the given offset.
484
+
485
+ Args:
486
+ offset: The offset in bytes to add an overlay at.
487
+ data: The bytes or file-like object to overlay.
488
+ size: Optional size specification of the overlay, if it can't be inferred.
489
+ """
490
+ if not hasattr(data, "read"):
491
+ size = size or len(data)
492
+ data = io.BytesIO(data)
493
+ elif size is None:
494
+ size = data.size if hasattr(data, "size") else data.seek(0, io.SEEK_END)
495
+
496
+ if not size:
497
+ return None
498
+
499
+ if size < 0:
500
+ raise ValueError("Size must be positive")
501
+
502
+ # Check if there are overlapping overlays
503
+ for other_offset, (other_size, _) in self.overlays.items():
504
+ if other_offset < offset + size and offset < other_offset + other_size:
505
+ raise ValueError(f"Overlap with existing overlay: ({other_offset, other_size})")
506
+
507
+ self.overlays[offset] = (size, data)
508
+ self._lookup.append(offset)
509
+ self._lookup.sort()
510
+
511
+ # Clear the buffer if we add an overlay at our current position
512
+ if self._buf and (self._pos_align <= offset + size and offset <= self._pos_align + len(self._buf)):
513
+ self._buf = None
514
+
515
+ return self
516
+
517
+ def _read(self, offset: int, length: int) -> bytes:
518
+ result = []
519
+
520
+ fh = self._fh
521
+ overlays = self.overlays
522
+ lookup = self._lookup
523
+
524
+ overlay_len = len(overlays)
525
+ overlay_idx = bisect_left(lookup, offset)
526
+
527
+ while length > 0:
528
+ prev_overlay_offset = None if overlay_idx == 0 else lookup[overlay_idx - 1]
529
+ next_overlay_offset = None if overlay_idx >= overlay_len else lookup[overlay_idx]
530
+
531
+ if prev_overlay_offset is not None:
532
+ prev_overlay_size, prev_overlay_data = overlays[prev_overlay_offset]
533
+ prev_overlay_end = prev_overlay_offset + prev_overlay_size
534
+
535
+ if prev_overlay_end > offset:
536
+ # Currently in an overlay
537
+ offset_in_prev_overlay = offset - prev_overlay_offset
538
+ prev_overlay_remaining = prev_overlay_size - offset_in_prev_overlay
539
+ prev_overlay_read_size = min(length, prev_overlay_remaining)
540
+
541
+ prev_overlay_data.seek(offset_in_prev_overlay)
542
+ result.append(prev_overlay_data.read(prev_overlay_read_size))
543
+
544
+ offset += prev_overlay_read_size
545
+ length -= prev_overlay_read_size
546
+
547
+ if length == 0:
548
+ break
549
+
550
+ if next_overlay_offset:
551
+ next_overlay_size, next_overlay_data = overlays[next_overlay_offset]
552
+ gap_to_next_overlay = next_overlay_offset - offset
553
+
554
+ if 0 <= gap_to_next_overlay < length:
555
+ if gap_to_next_overlay:
556
+ fh.seek(offset)
557
+ result.append(fh.read(gap_to_next_overlay))
558
+
559
+ # read remaining from overlay
560
+ next_overlay_read_size = min(next_overlay_size, length - gap_to_next_overlay)
561
+ next_overlay_data.seek(0)
562
+ result.append(next_overlay_data.read(next_overlay_read_size))
563
+
564
+ offset += next_overlay_read_size + gap_to_next_overlay
565
+ length -= next_overlay_read_size + gap_to_next_overlay
566
+ else:
567
+ # Next overlay is too far away, complete read
568
+ fh.seek(offset)
569
+ result.append(fh.read(length))
570
+ break
571
+ else:
572
+ # No next overlay, complete read
573
+ fh.seek(offset)
574
+ result.append(fh.read(length))
575
+ break
576
+
577
+ overlay_idx += 1
578
+
579
+ return b"".join(result)
580
+
581
+
582
+ class ZlibStream(AlignedStream):
583
+ """Create a zlib stream from another file-like object.
584
+
585
+ Basically the same as ``gzip.GzipFile`` but for raw zlib streams.
586
+ Due to the nature of zlib streams, seeking backwards requires resetting the decompression context.
587
+
588
+ Args:
589
+ fh: The source file-like object.
590
+ size: The size the stream should be.
591
+ """
592
+
593
+ def __init__(self, fh: BinaryIO, size: int | None = None, align: int = STREAM_BUFFER_SIZE, **kwargs):
594
+ self._fh = fh
595
+
596
+ self._zlib = None
597
+ self._zlib_args = kwargs
598
+ self._zlib_offset = 0
599
+ self._zlib_prepend = b""
600
+ self._zlib_prepend_offset = None
601
+ self._rewind()
602
+
603
+ super().__init__(size, align)
604
+
605
+ def _rewind(self) -> None:
606
+ self._fh.seek(0)
607
+ self._zlib = zlib.decompressobj(**self._zlib_args)
608
+ self._zlib_offset = 0
609
+ self._zlib_prepend = b""
610
+ self._zlib_prepend_offset = None
611
+
612
+ def _seek_zlib(self, offset: int) -> None:
613
+ if offset < self._zlib_offset:
614
+ self._rewind()
615
+
616
+ while self._zlib_offset < offset:
617
+ read_size = min(offset - self._zlib_offset, self.align)
618
+ if self._read_zlib(read_size) == b"":
619
+ break
620
+
621
+ def _read_fh(self, length: int) -> bytes:
622
+ if self._zlib_prepend_offset is None:
623
+ return self._fh.read(length)
624
+
625
+ if self._zlib_prepend_offset + length <= len(self._zlib_prepend):
626
+ offset = self._zlib_prepend_offset
627
+ self._zlib_prepend_offset += length
628
+ return self._zlib_prepend[offset : self._zlib_prepend_offset]
629
+
630
+ offset = self._zlib_prepend_offset
631
+ self._zlib_prepend_offset = None
632
+ return self._zlib_prepend[offset:] + self._fh.read(length - len(self._zlib_prepend) + offset)
633
+
634
+ def _read_zlib(self, length: int) -> bytes:
635
+ if length < 0:
636
+ return self.readall()
637
+
638
+ result = []
639
+ while length > 0:
640
+ buf = self._read_fh(io.DEFAULT_BUFFER_SIZE)
641
+ decompressed = self._zlib.decompress(buf, length)
642
+
643
+ if self._zlib.unconsumed_tail != b"":
644
+ self._zlib_prepend = self._zlib.unconsumed_tail
645
+ self._zlib_prepend_offset = 0
646
+
647
+ if buf == b"":
648
+ break
649
+
650
+ result.append(decompressed)
651
+ length -= len(decompressed)
652
+
653
+ buf = b"".join(result)
654
+ self._zlib_offset += len(buf)
655
+ return buf
656
+
657
+ def _read(self, offset: int, length: int) -> bytes:
658
+ self._seek_zlib(offset)
659
+ return self._read_zlib(length)
660
+
661
+ def readall(self) -> bytes:
662
+ self._seek_zlib(self.tell())
663
+
664
+ chunks = []
665
+ # sys.maxsize means the max length of output buffer is unlimited,
666
+ # so that the whole input buffer can be decompressed within one
667
+ # .decompress() call.
668
+ while data := self._read_zlib(sys.maxsize):
669
+ chunks.append(data)
670
+
671
+ return b"".join(chunks)
672
+
673
+
674
+ class BitStream:
675
+ """Bit-level stream reader.
676
+
677
+ Args:
678
+ fh: File-like object to read bits from.
679
+ """
680
+
681
+ def __init__(self, fh: BinaryIO):
682
+ self.fh = fh
683
+ self._byte_offset = fh.tell()
684
+
685
+ self.buffer = 0
686
+ self.bits = 0
687
+
688
+ def readable(self) -> bool:
689
+ """Indicate that the stream is readable."""
690
+ return True
691
+
692
+ def seekable(self) -> bool:
693
+ """Indicate that the stream is seekable."""
694
+ return True
695
+
696
+ def writable(self) -> bool:
697
+ """Indicate that the stream is not writable."""
698
+ return False
699
+
700
+ def seek(self, pos: int, whence: int = io.SEEK_SET) -> int:
701
+ """Seek the stream to the specified position in bits.
702
+
703
+ Returns:
704
+ The new stream position after seeking.
705
+ """
706
+ if whence == io.SEEK_SET:
707
+ byte_pos, bit_pos = divmod(pos, 8)
708
+ elif whence == io.SEEK_CUR:
709
+ current_pos = self.tell()
710
+ byte_pos, bit_pos = divmod(current_pos + pos, 8)
711
+ elif whence == io.SEEK_END:
712
+ self.fh.seek(0, io.SEEK_END)
713
+ end_pos = self.fh.tell() * 8
714
+ byte_pos, bit_pos = divmod(end_pos + pos, 8)
715
+ else:
716
+ raise IOError("invalid whence value")
717
+
718
+ self._byte_offset = byte_pos
719
+ self.bits = 0
720
+ self.buffer = 0
721
+
722
+ if bit_pos > 0:
723
+ self.read(bit_pos)
724
+
725
+ return self.tell()
726
+
727
+ def tell(self) -> int:
728
+ """Get the current position in the stream in bits."""
729
+ return (self._byte_offset * 8) - self.bits
730
+
731
+ def read(self, n: int) -> int:
732
+ """Read n bits from the stream.
733
+
734
+ Args:
735
+ n: Number of bits to read.
736
+ """
737
+ value = self.peek(n)
738
+ self.remove(n)
739
+ return value
740
+
741
+ def peek(self, n: int) -> int:
742
+ """Peek n bits from the stream without advancing.
743
+
744
+ Args:
745
+ n: Number of bits to peek.
746
+ """
747
+ if n == 0:
748
+ return 0
749
+
750
+ if n > self.bits:
751
+ while self.bits < n:
752
+ num_bytes = (n - self.bits + 7) // 8
753
+ self.fh.seek(self._byte_offset)
754
+ if not (buf := self.fh.read(num_bytes)):
755
+ break
756
+
757
+ new_bits = int.from_bytes(buf, "big")
758
+ num_new_bits = len(buf) * 8
759
+ self.buffer = (self.buffer << num_new_bits) | new_bits
760
+ self.bits += num_new_bits
761
+ self._byte_offset += len(buf)
762
+
763
+ return self.buffer >> (self.bits - min(n, self.bits))
764
+
765
+ def remove(self, n: int) -> None:
766
+ """Remove n bits from the stream.
767
+
768
+ Args:
769
+ n: Number of bits to remove.
770
+ """
771
+ self.bits -= min(n, self.bits)
772
+ self.buffer &= (1 << (self.bits)) - 1