dissect.util 3.24.dev2__cp314-cp314t-manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dissect.util might be problematic. Click here for more details.
- dissect/util/__init__.py +20 -0
- dissect/util/_build.py +17 -0
- dissect/util/_native/__init__.pyi +3 -0
- dissect/util/_native/compression/__init__.pyi +3 -0
- dissect/util/_native/compression/lz4.pyi +7 -0
- dissect/util/_native/compression/lzo.pyi +3 -0
- dissect/util/_native/hash/__init__.py +3 -0
- dissect/util/_native/hash/crc32c.py +2 -0
- dissect/util/_native.cpython-314t-aarch64-linux-gnu.so +0 -0
- dissect/util/compression/__init__.py +45 -0
- dissect/util/compression/lz4.py +95 -0
- dissect/util/compression/lzbitmap.py +130 -0
- dissect/util/compression/lzfse.py +467 -0
- dissect/util/compression/lznt1.py +92 -0
- dissect/util/compression/lzo.py +118 -0
- dissect/util/compression/lzvn.py +241 -0
- dissect/util/compression/lzxpress.py +80 -0
- dissect/util/compression/lzxpress_huffman.py +184 -0
- dissect/util/compression/sevenbit.py +77 -0
- dissect/util/compression/xz.py +112 -0
- dissect/util/cpio.py +226 -0
- dissect/util/encoding/__init__.py +0 -0
- dissect/util/encoding/surrogateescape.py +21 -0
- dissect/util/exceptions.py +6 -0
- dissect/util/hash/__init__.py +28 -0
- dissect/util/hash/crc32.py +55 -0
- dissect/util/hash/crc32c.py +60 -0
- dissect/util/hash/jenkins.py +102 -0
- dissect/util/ldap.py +237 -0
- dissect/util/plist.py +156 -0
- dissect/util/sid.py +81 -0
- dissect/util/stream.py +772 -0
- dissect/util/tools/__init__.py +0 -0
- dissect/util/tools/dump_nskeyedarchiver.py +61 -0
- dissect/util/ts.py +295 -0
- dissect/util/xmemoryview.py +117 -0
- dissect_util-3.24.dev2.dist-info/METADATA +89 -0
- dissect_util-3.24.dev2.dist-info/RECORD +43 -0
- dissect_util-3.24.dev2.dist-info/WHEEL +5 -0
- dissect_util-3.24.dev2.dist-info/entry_points.txt +2 -0
- dissect_util-3.24.dev2.dist-info/licenses/COPYRIGHT +5 -0
- dissect_util-3.24.dev2.dist-info/licenses/LICENSE +201 -0
- dissect_util-3.24.dev2.dist-info/top_level.txt +1 -0
dissect/util/__init__.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from dissect.util.compression import (
|
|
2
|
+
lz4,
|
|
3
|
+
lznt1,
|
|
4
|
+
lzo,
|
|
5
|
+
lzxpress,
|
|
6
|
+
lzxpress_huffman,
|
|
7
|
+
sevenbit,
|
|
8
|
+
)
|
|
9
|
+
from dissect.util.hash import crc32c, jenkins
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"crc32c",
|
|
13
|
+
"jenkins",
|
|
14
|
+
"lz4",
|
|
15
|
+
"lznt1",
|
|
16
|
+
"lzo",
|
|
17
|
+
"lzxpress",
|
|
18
|
+
"lzxpress_huffman",
|
|
19
|
+
"sevenbit",
|
|
20
|
+
]
|
dissect/util/_build.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Reference: https://setuptools.pypa.io/en/latest/build_meta.html#dynamic-build-dependencies-and-other-build-meta-tweaks
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import setuptools.build_meta
|
|
8
|
+
from setuptools.build_meta import * # noqa: F403
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_requires_for_build_wheel(config_settings: dict[str, Any] | None = None) -> list[str]:
|
|
12
|
+
base = setuptools.build_meta.get_requires_for_build_wheel(config_settings)
|
|
13
|
+
|
|
14
|
+
if os.getenv("BUILD_RUST", "").lower() in {"1", "true"} or (config_settings and "--build-rust" in config_settings):
|
|
15
|
+
return [*base, "setuptools-rust"]
|
|
16
|
+
|
|
17
|
+
return base
|
|
Binary file
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from dissect.util.compression import lz4, lzo
|
|
2
|
+
|
|
3
|
+
lz4_python = lz4
|
|
4
|
+
lzo_python = lzo
|
|
5
|
+
|
|
6
|
+
# This selects between a native Rust version of lz4 and lzo (when available) and our own
|
|
7
|
+
# pure-Python implementation.
|
|
8
|
+
#
|
|
9
|
+
# By doing a:
|
|
10
|
+
# from dissect.util.compression import lz4
|
|
11
|
+
# or
|
|
12
|
+
# from dissect.util.compression import lzo
|
|
13
|
+
#
|
|
14
|
+
# in another project will automatically give you one or the other.
|
|
15
|
+
#
|
|
16
|
+
# The native Rust version is also available as dissect.util.compression.lz4_native
|
|
17
|
+
# and dissect.util.compression.lzo_native (when available) and the pure Python
|
|
18
|
+
# version is always available as dissect.util.compression.lz4_python and
|
|
19
|
+
# dissect.util.compression.lzo_python.
|
|
20
|
+
#
|
|
21
|
+
# Note that the pure Python implementation and the Rust implementation are NOT a full replacement
|
|
22
|
+
# for the "official" lz4 and lzo Python packages: only the decompress() function is implemented.
|
|
23
|
+
try:
|
|
24
|
+
from dissect.util import _native
|
|
25
|
+
|
|
26
|
+
lz4 = lz4_native = _native.compression.lz4
|
|
27
|
+
lzo = lzo_native = _native.compression.lzo
|
|
28
|
+
except (ImportError, AttributeError):
|
|
29
|
+
lz4_native = lzo_native = None
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"lz4",
|
|
33
|
+
"lz4_native",
|
|
34
|
+
"lz4_python",
|
|
35
|
+
"lzbitmap",
|
|
36
|
+
"lzfse",
|
|
37
|
+
"lznt1",
|
|
38
|
+
"lzo",
|
|
39
|
+
"lzo_native",
|
|
40
|
+
"lzo_python",
|
|
41
|
+
"lzvn",
|
|
42
|
+
"lzxpress",
|
|
43
|
+
"lzxpress_huffman",
|
|
44
|
+
"sevenbit",
|
|
45
|
+
]
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import struct
|
|
5
|
+
from typing import BinaryIO
|
|
6
|
+
|
|
7
|
+
from dissect.util.exceptions import CorruptDataError
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _get_length(src: BinaryIO, length: int) -> int:
|
|
11
|
+
if length != 0xF:
|
|
12
|
+
return length
|
|
13
|
+
|
|
14
|
+
while True:
|
|
15
|
+
read_buf = src.read(1)
|
|
16
|
+
if len(read_buf) != 1:
|
|
17
|
+
raise CorruptDataError("EOF at length read")
|
|
18
|
+
len_part = ord(read_buf)
|
|
19
|
+
length += len_part
|
|
20
|
+
|
|
21
|
+
if len_part != 0xFF:
|
|
22
|
+
break
|
|
23
|
+
|
|
24
|
+
return length
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def decompress(
|
|
28
|
+
src: bytes | BinaryIO,
|
|
29
|
+
uncompressed_size: int = -1,
|
|
30
|
+
return_bytearray: bool = False,
|
|
31
|
+
) -> bytes | tuple[bytes, int]:
|
|
32
|
+
"""LZ4 decompress from a file-like object or bytes up to a certain length. Assumes no header.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
src: File-like object or bytes to decompress from.
|
|
36
|
+
uncompressed_size: Ignored, present for compatibility with native lz4.
|
|
37
|
+
return_bytearray: Whether to return ``bytearray`` or ``bytes``.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
The decompressed data.
|
|
41
|
+
"""
|
|
42
|
+
if not hasattr(src, "read"):
|
|
43
|
+
src = io.BytesIO(src)
|
|
44
|
+
|
|
45
|
+
dst = bytearray()
|
|
46
|
+
min_match_len = 4
|
|
47
|
+
|
|
48
|
+
while True:
|
|
49
|
+
if len(read_buf := src.read(1)) == 0:
|
|
50
|
+
raise CorruptDataError("EOF at reading literal-len")
|
|
51
|
+
|
|
52
|
+
token = ord(read_buf)
|
|
53
|
+
literal_len = _get_length(src, (token >> 4) & 0xF)
|
|
54
|
+
|
|
55
|
+
if len(dst) + literal_len > uncompressed_size > 0:
|
|
56
|
+
raise CorruptDataError("Decompressed size exceeds uncompressed_size")
|
|
57
|
+
|
|
58
|
+
if len(read_buf := src.read(literal_len)) != literal_len:
|
|
59
|
+
raise CorruptDataError("Not literal data")
|
|
60
|
+
|
|
61
|
+
dst.extend(read_buf)
|
|
62
|
+
if len(dst) >= uncompressed_size > 0:
|
|
63
|
+
break
|
|
64
|
+
|
|
65
|
+
if len(read_buf := src.read(2)) == 0:
|
|
66
|
+
token_and = token & 0xF
|
|
67
|
+
if token_and != 0:
|
|
68
|
+
raise CorruptDataError(f"EOF, but match-len > 0: {token_and}")
|
|
69
|
+
break
|
|
70
|
+
|
|
71
|
+
if len(read_buf) != 2:
|
|
72
|
+
raise CorruptDataError("Premature EOF")
|
|
73
|
+
|
|
74
|
+
if (offset := struct.unpack("<H", read_buf)[0]) == 0:
|
|
75
|
+
raise CorruptDataError("Offset can't be 0")
|
|
76
|
+
|
|
77
|
+
match_len = _get_length(src, (token >> 0) & 0xF)
|
|
78
|
+
match_len += min_match_len
|
|
79
|
+
|
|
80
|
+
if len(dst) + match_len > uncompressed_size > 0:
|
|
81
|
+
raise CorruptDataError("Decompressed size exceeds uncompressed_size")
|
|
82
|
+
|
|
83
|
+
remaining = match_len
|
|
84
|
+
while remaining > 0:
|
|
85
|
+
match_size = min(remaining, offset)
|
|
86
|
+
dst += dst[-offset : (-offset + match_size) or None]
|
|
87
|
+
remaining -= match_size
|
|
88
|
+
|
|
89
|
+
if len(dst) >= uncompressed_size > 0:
|
|
90
|
+
break
|
|
91
|
+
|
|
92
|
+
if not return_bytearray:
|
|
93
|
+
dst = bytes(dst)
|
|
94
|
+
|
|
95
|
+
return dst[:uncompressed_size] if uncompressed_size > 0 else dst
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# References:
|
|
2
|
+
# - https://github.com/eafer/libzbitmap
|
|
3
|
+
# - https://github.com/sgan81/apfs-fuse
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import io
|
|
7
|
+
import itertools
|
|
8
|
+
import struct
|
|
9
|
+
from typing import BinaryIO
|
|
10
|
+
|
|
11
|
+
_H = struct.Struct("<H")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def decompress(src: bytes | BinaryIO) -> bytes:
|
|
15
|
+
"""LZBITMAP decompress from a file-like object or bytes.
|
|
16
|
+
|
|
17
|
+
Decompresses until EOF or EOS of the input data.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
src: File-like object or bytes to decompress.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
The decompressed data.
|
|
24
|
+
"""
|
|
25
|
+
if not hasattr(src, "read"):
|
|
26
|
+
src = io.BytesIO(src)
|
|
27
|
+
|
|
28
|
+
if src.read(4) != b"ZBM\x09":
|
|
29
|
+
raise ValueError("Not a valid LZBITMAP stream")
|
|
30
|
+
|
|
31
|
+
dst = bytearray()
|
|
32
|
+
|
|
33
|
+
while True:
|
|
34
|
+
compressed_size = int.from_bytes(src.read(3), "little")
|
|
35
|
+
uncompressed_size = int.from_bytes(src.read(3), "little")
|
|
36
|
+
|
|
37
|
+
if compressed_size == uncompressed_size + 6: # chunk header size
|
|
38
|
+
# Not compressed
|
|
39
|
+
dst += src.read(uncompressed_size)
|
|
40
|
+
|
|
41
|
+
elif uncompressed_size == 0:
|
|
42
|
+
# End of stream
|
|
43
|
+
break
|
|
44
|
+
|
|
45
|
+
else:
|
|
46
|
+
# Compressed
|
|
47
|
+
distance_offset = int.from_bytes(src.read(3), "little")
|
|
48
|
+
bitmap_offset = int.from_bytes(src.read(3), "little")
|
|
49
|
+
token_offset = int.from_bytes(src.read(3), "little")
|
|
50
|
+
literal_offset = 15
|
|
51
|
+
|
|
52
|
+
# Buffer the whole chunk
|
|
53
|
+
src.seek(-15, io.SEEK_CUR)
|
|
54
|
+
buf = memoryview(src.read(compressed_size))
|
|
55
|
+
|
|
56
|
+
# Build the bitmap/token map
|
|
57
|
+
token_map = []
|
|
58
|
+
bits = int.from_bytes(buf[-17:], "little")
|
|
59
|
+
for i in range(0xF):
|
|
60
|
+
if i < 3:
|
|
61
|
+
token_map.append((None, i))
|
|
62
|
+
else:
|
|
63
|
+
token_map.append((bits & 0xFF, (bits >> 8) & 3))
|
|
64
|
+
bits >>= 10
|
|
65
|
+
|
|
66
|
+
# Tokens are stored as nibbles, so we need to split each byte into two
|
|
67
|
+
tokens = itertools.chain.from_iterable((b & 0xF, (b >> 4) & 0xF) for b in buf[token_offset:-17])
|
|
68
|
+
|
|
69
|
+
# Initial match distance is 8, and is not reset between tokens
|
|
70
|
+
distance = 8
|
|
71
|
+
|
|
72
|
+
prev_token = None
|
|
73
|
+
while uncompressed_size > 0:
|
|
74
|
+
if (idx := next(tokens, None) if prev_token is None else prev_token) is None:
|
|
75
|
+
break
|
|
76
|
+
|
|
77
|
+
if idx == 0xF:
|
|
78
|
+
# 0xF indicates a repeat count
|
|
79
|
+
raise ValueError("Invalid token index in LZBITMAP stream")
|
|
80
|
+
|
|
81
|
+
if (repeat := next(tokens)) != 0xF:
|
|
82
|
+
# No repeat count, store the token for next iteration
|
|
83
|
+
prev_token = repeat
|
|
84
|
+
repeat = 1
|
|
85
|
+
else:
|
|
86
|
+
# Repeat count, read and sum
|
|
87
|
+
prev_token = None
|
|
88
|
+
total = 4
|
|
89
|
+
while repeat == 0xF:
|
|
90
|
+
repeat = next(tokens)
|
|
91
|
+
total += repeat
|
|
92
|
+
|
|
93
|
+
if total < repeat:
|
|
94
|
+
raise ValueError("Invalid repeat count in LZBITMAP stream")
|
|
95
|
+
|
|
96
|
+
repeat = total
|
|
97
|
+
|
|
98
|
+
for _ in range(repeat):
|
|
99
|
+
bitmap, token = token_map[idx]
|
|
100
|
+
if idx < 3:
|
|
101
|
+
# Index 0, 1, 2 are special and indicate we need to read a bitmap from the bitmap region
|
|
102
|
+
bitmap = buf[bitmap_offset]
|
|
103
|
+
bitmap_offset += 1
|
|
104
|
+
|
|
105
|
+
if token == 1:
|
|
106
|
+
# 1-byte distance
|
|
107
|
+
distance = buf[distance_offset]
|
|
108
|
+
distance_offset += 1
|
|
109
|
+
elif token == 2:
|
|
110
|
+
# 2-byte distance
|
|
111
|
+
(distance,) = _H.unpack_from(buf, distance_offset)
|
|
112
|
+
distance_offset += 2
|
|
113
|
+
|
|
114
|
+
for _ in range(8):
|
|
115
|
+
if bitmap & 1:
|
|
116
|
+
# Literal
|
|
117
|
+
dst.append(buf[literal_offset])
|
|
118
|
+
literal_offset += 1
|
|
119
|
+
else:
|
|
120
|
+
# Match
|
|
121
|
+
if distance > len(dst):
|
|
122
|
+
raise ValueError("Invalid match distance in LZBITMAP stream")
|
|
123
|
+
dst.append(dst[-distance])
|
|
124
|
+
|
|
125
|
+
bitmap >>= 1
|
|
126
|
+
uncompressed_size -= 1
|
|
127
|
+
if uncompressed_size == 0:
|
|
128
|
+
break
|
|
129
|
+
|
|
130
|
+
return bytes(dst)
|