dissect.util 3.24.dev1__cp314-cp314t-manylinux_2_28_s390x.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dissect/util/__init__.py +20 -0
- dissect/util/_build.py +17 -0
- dissect/util/_native/__init__.pyi +3 -0
- dissect/util/_native/compression/__init__.pyi +3 -0
- dissect/util/_native/compression/lz4.pyi +7 -0
- dissect/util/_native/compression/lzo.pyi +3 -0
- dissect/util/_native/hash/__init__.py +3 -0
- dissect/util/_native/hash/crc32c.py +2 -0
- dissect/util/_native.cpython-314t-s390x-linux-gnu.so +0 -0
- dissect/util/compression/__init__.py +45 -0
- dissect/util/compression/lz4.py +95 -0
- dissect/util/compression/lzbitmap.py +130 -0
- dissect/util/compression/lzfse.py +467 -0
- dissect/util/compression/lznt1.py +92 -0
- dissect/util/compression/lzo.py +118 -0
- dissect/util/compression/lzvn.py +241 -0
- dissect/util/compression/lzxpress.py +80 -0
- dissect/util/compression/lzxpress_huffman.py +184 -0
- dissect/util/compression/sevenbit.py +77 -0
- dissect/util/compression/xz.py +112 -0
- dissect/util/cpio.py +226 -0
- dissect/util/encoding/__init__.py +0 -0
- dissect/util/encoding/surrogateescape.py +21 -0
- dissect/util/exceptions.py +6 -0
- dissect/util/hash/__init__.py +28 -0
- dissect/util/hash/crc32.py +55 -0
- dissect/util/hash/crc32c.py +60 -0
- dissect/util/hash/jenkins.py +102 -0
- dissect/util/ldap.py +237 -0
- dissect/util/plist.py +156 -0
- dissect/util/sid.py +81 -0
- dissect/util/stream.py +671 -0
- dissect/util/tools/__init__.py +0 -0
- dissect/util/tools/dump_nskeyedarchiver.py +61 -0
- dissect/util/ts.py +295 -0
- dissect/util/xmemoryview.py +117 -0
- dissect_util-3.24.dev1.dist-info/METADATA +89 -0
- dissect_util-3.24.dev1.dist-info/RECORD +43 -0
- dissect_util-3.24.dev1.dist-info/WHEEL +5 -0
- dissect_util-3.24.dev1.dist-info/entry_points.txt +2 -0
- dissect_util-3.24.dev1.dist-info/licenses/COPYRIGHT +5 -0
- dissect_util-3.24.dev1.dist-info/licenses/LICENSE +201 -0
- dissect_util-3.24.dev1.dist-info/top_level.txt +1 -0
dissect/util/cpio.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import stat
|
|
4
|
+
import struct
|
|
5
|
+
import tarfile
|
|
6
|
+
from tarfile import InvalidHeaderError
|
|
7
|
+
from typing import BinaryIO
|
|
8
|
+
|
|
9
|
+
FORMAT_CPIO_BIN = 10
|
|
10
|
+
FORMAT_CPIO_ODC = 11
|
|
11
|
+
FORMAT_CPIO_NEWC = 12
|
|
12
|
+
FORMAT_CPIO_CRC = 13
|
|
13
|
+
FORMAT_CPIO_HPBIN = 16
|
|
14
|
+
FORMAT_CPIO_HPODC = 17
|
|
15
|
+
FORMAT_CPIO_UNKNOWN = 18
|
|
16
|
+
|
|
17
|
+
CPIO_MAGIC_OLD = 0o070707
|
|
18
|
+
CPIO_MAGIC_NEW = 0o070701
|
|
19
|
+
CPIO_MAGIC_CRC = 0o070702
|
|
20
|
+
|
|
21
|
+
TYPE_MAP = {
|
|
22
|
+
stat.S_IFREG: tarfile.REGTYPE,
|
|
23
|
+
stat.S_IFDIR: tarfile.DIRTYPE,
|
|
24
|
+
stat.S_IFIFO: tarfile.FIFOTYPE,
|
|
25
|
+
stat.S_IFLNK: tarfile.SYMTYPE,
|
|
26
|
+
stat.S_IFCHR: tarfile.CHRTYPE,
|
|
27
|
+
stat.S_IFBLK: tarfile.BLKTYPE,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class CpioInfo(tarfile.TarInfo):
|
|
32
|
+
"""Custom ``TarInfo`` implementation for reading cpio archives.
|
|
33
|
+
|
|
34
|
+
Examples::
|
|
35
|
+
|
|
36
|
+
tarfile.open(..., tarinfo=CpioInfo)
|
|
37
|
+
# or
|
|
38
|
+
tarfile.TarFile(..., tarinfo=CpioInfo)
|
|
39
|
+
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def fromtarfile(cls, tarfile: tarfile.TarFile) -> CpioInfo:
|
|
44
|
+
if tarfile.format not in (
|
|
45
|
+
FORMAT_CPIO_BIN,
|
|
46
|
+
FORMAT_CPIO_ODC,
|
|
47
|
+
FORMAT_CPIO_NEWC,
|
|
48
|
+
FORMAT_CPIO_CRC,
|
|
49
|
+
FORMAT_CPIO_HPBIN,
|
|
50
|
+
FORMAT_CPIO_HPODC,
|
|
51
|
+
):
|
|
52
|
+
tarfile.format = detect_header(tarfile.fileobj)
|
|
53
|
+
|
|
54
|
+
if tarfile.format in (FORMAT_CPIO_BIN, FORMAT_CPIO_HPBIN):
|
|
55
|
+
buf = tarfile.fileobj.read(26)
|
|
56
|
+
elif tarfile.format in (FORMAT_CPIO_ODC, FORMAT_CPIO_HPODC):
|
|
57
|
+
buf = tarfile.fileobj.read(76)
|
|
58
|
+
elif tarfile.format in (FORMAT_CPIO_NEWC, FORMAT_CPIO_CRC):
|
|
59
|
+
buf = tarfile.fileobj.read(110)
|
|
60
|
+
else:
|
|
61
|
+
raise InvalidHeaderError("Unknown cpio type")
|
|
62
|
+
|
|
63
|
+
obj = cls.frombuf(buf, tarfile.format, tarfile.encoding, tarfile.errors)
|
|
64
|
+
obj.format = tarfile.format
|
|
65
|
+
obj.offset = tarfile.fileobj.tell() - len(buf)
|
|
66
|
+
return obj._proc_member(tarfile)
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def frombuf(cls, buf: bytes, format: int, encoding: str, errors: str) -> CpioInfo:
|
|
70
|
+
if format in (FORMAT_CPIO_BIN, FORMAT_CPIO_ODC, FORMAT_CPIO_HPBIN, FORMAT_CPIO_HPODC):
|
|
71
|
+
obj = cls._old_frombuf(buf, format)
|
|
72
|
+
elif format in (FORMAT_CPIO_NEWC, FORMAT_CPIO_CRC):
|
|
73
|
+
obj = cls._new_frombuf(buf, format)
|
|
74
|
+
|
|
75
|
+
# Common postprocessing
|
|
76
|
+
ftype = stat.S_IFMT(obj._mode)
|
|
77
|
+
obj.type = TYPE_MAP.get(ftype, ftype)
|
|
78
|
+
obj.mode = stat.S_IMODE(obj._mode)
|
|
79
|
+
|
|
80
|
+
return obj
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def _old_frombuf(cls, buf: bytes, format: int) -> CpioInfo:
|
|
84
|
+
if format in (FORMAT_CPIO_BIN, FORMAT_CPIO_HPBIN):
|
|
85
|
+
values = list(struct.unpack("<13H", buf))
|
|
86
|
+
if values[0] == _swap16(CPIO_MAGIC_OLD):
|
|
87
|
+
values = [_swap16(v) for v in values]
|
|
88
|
+
|
|
89
|
+
mtime = (values.pop(8) << 16) | values.pop(8)
|
|
90
|
+
size = (values.pop(9) << 16) | values.pop(9)
|
|
91
|
+
values.insert(8, mtime)
|
|
92
|
+
values.append(size)
|
|
93
|
+
else:
|
|
94
|
+
values = [int(v, 8) for v in struct.unpack("<6s6s6s6s6s6s6s6s11s6s11s", buf)]
|
|
95
|
+
|
|
96
|
+
if values[0] != CPIO_MAGIC_OLD:
|
|
97
|
+
raise InvalidHeaderError(f"Invalid (old) ASCII/binary cpio header magic: {oct(values[0])}")
|
|
98
|
+
|
|
99
|
+
obj = cls()
|
|
100
|
+
obj.devmajor = values[1] >> 8
|
|
101
|
+
obj.devminor = values[1] & 0xFF
|
|
102
|
+
obj._mode = values[3]
|
|
103
|
+
obj.uid = values[4]
|
|
104
|
+
obj.gid = values[5]
|
|
105
|
+
obj.mtime = values[8]
|
|
106
|
+
obj.size = values[10]
|
|
107
|
+
|
|
108
|
+
# Extra fields
|
|
109
|
+
obj.magic = values[0]
|
|
110
|
+
obj.ino = values[2]
|
|
111
|
+
obj.nlink = values[6]
|
|
112
|
+
obj.rdevmajor = values[7] >> 8
|
|
113
|
+
obj.rdevminor = values[7] & 0xFF
|
|
114
|
+
obj.namesize = values[9]
|
|
115
|
+
|
|
116
|
+
# This is a specific case for HP/UX cpio archives, which I'll let this comment from the original source explain:
|
|
117
|
+
# HP/UX cpio creates archives that look just like ordinary archives,
|
|
118
|
+
# but for devices it sets major = 0, minor = 1, and puts the
|
|
119
|
+
# actual major/minor number in the filesize field. See if this
|
|
120
|
+
# is an HP/UX cpio archive, and if so fix it. We have to do this
|
|
121
|
+
# here because process_copy_in() assumes filesize is always 0
|
|
122
|
+
# for devices.
|
|
123
|
+
if (
|
|
124
|
+
stat.S_IFMT(obj.mode) in (stat.S_IFCHR, stat.S_IFBLK, stat.S_IFSOCK, stat.S_IFIFO)
|
|
125
|
+
and obj.size != 0
|
|
126
|
+
and obj.rdevmajor == 0
|
|
127
|
+
and obj.rdevminor == 1
|
|
128
|
+
):
|
|
129
|
+
obj.rdevmajor = (obj.size >> 8) & 0xFF
|
|
130
|
+
obj.rdevminor = obj.size & 0xFF
|
|
131
|
+
obj.size = 0
|
|
132
|
+
|
|
133
|
+
return obj
|
|
134
|
+
|
|
135
|
+
@classmethod
|
|
136
|
+
def _new_frombuf(cls, buf: bytes, format: int) -> CpioInfo:
|
|
137
|
+
values = struct.unpack("<6s8s8s8s8s8s8s8s8s8s8s8s8s8s", buf)
|
|
138
|
+
values = [int(values[0], 8)] + [int(v, 16) for v in values[1:]]
|
|
139
|
+
if values[0] not in (CPIO_MAGIC_NEW, CPIO_MAGIC_CRC):
|
|
140
|
+
raise InvalidHeaderError(f"Invalid (new) ASCII cpio header magic: {oct(values[0])}")
|
|
141
|
+
|
|
142
|
+
obj = cls()
|
|
143
|
+
obj._mode = values[2]
|
|
144
|
+
obj.uid = values[3]
|
|
145
|
+
obj.gid = values[4]
|
|
146
|
+
obj.mtime = values[6]
|
|
147
|
+
obj.size = values[7]
|
|
148
|
+
obj.devmajor = values[8]
|
|
149
|
+
obj.devminor = values[9]
|
|
150
|
+
obj.chksum = values[13]
|
|
151
|
+
|
|
152
|
+
# Extra fields
|
|
153
|
+
obj.magic = values[0]
|
|
154
|
+
obj.ino = values[1]
|
|
155
|
+
obj.nlink = values[5]
|
|
156
|
+
obj.rdevmajor = values[10]
|
|
157
|
+
obj.rdevminor = values[11]
|
|
158
|
+
obj.namesize = values[12]
|
|
159
|
+
|
|
160
|
+
return obj
|
|
161
|
+
|
|
162
|
+
def _proc_member(self, tarfile: tarfile.TarFile) -> CpioInfo | None:
|
|
163
|
+
self.name = tarfile.fileobj.read(self.namesize - 1).decode(tarfile.encoding, tarfile.errors)
|
|
164
|
+
if self.name == "TRAILER!!!":
|
|
165
|
+
# The last entry in a cpio file has the special name ``TRAILER!!!``, indicating the end of the archive
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
offset = tarfile.fileobj.tell() + 1
|
|
169
|
+
self.offset_data = self._round_word(offset)
|
|
170
|
+
tarfile.offset = self._round_word(self.offset_data + self.size)
|
|
171
|
+
|
|
172
|
+
if self.issym():
|
|
173
|
+
tarfile.fileobj.seek(self.offset_data)
|
|
174
|
+
self.linkname = tarfile.fileobj.read(self.size).decode(tarfile.encoding, tarfile.errors)
|
|
175
|
+
self.size = 0
|
|
176
|
+
|
|
177
|
+
return self
|
|
178
|
+
|
|
179
|
+
def _round_word(self, offset: int) -> int:
|
|
180
|
+
if self.format in (FORMAT_CPIO_BIN, FORMAT_CPIO_HPBIN):
|
|
181
|
+
return (offset + 1) & ~0x01
|
|
182
|
+
|
|
183
|
+
if self.format in (FORMAT_CPIO_NEWC, FORMAT_CPIO_CRC):
|
|
184
|
+
return (offset + 3) & ~0x03
|
|
185
|
+
|
|
186
|
+
return offset
|
|
187
|
+
|
|
188
|
+
def issocket(self) -> bool:
|
|
189
|
+
"""Return True if it is a socket."""
|
|
190
|
+
return self.type == stat.S_IFSOCK
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def detect_header(fh: BinaryIO) -> int:
|
|
194
|
+
"""Detect a cpio format on a file-like object."""
|
|
195
|
+
offset = fh.tell()
|
|
196
|
+
magic = fh.read(6)
|
|
197
|
+
fh.seek(offset)
|
|
198
|
+
|
|
199
|
+
result = FORMAT_CPIO_UNKNOWN
|
|
200
|
+
if magic == b"070701":
|
|
201
|
+
result = FORMAT_CPIO_NEWC
|
|
202
|
+
elif magic == b"070707":
|
|
203
|
+
result = FORMAT_CPIO_ODC
|
|
204
|
+
elif magic == b"070702":
|
|
205
|
+
result = FORMAT_CPIO_CRC
|
|
206
|
+
elif magic[:2] in (b"\x71\xc7", b"\xc7\x71"):
|
|
207
|
+
# 0o070707 in little and big endian
|
|
208
|
+
result = FORMAT_CPIO_BIN
|
|
209
|
+
|
|
210
|
+
return result
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _swap16(value: int) -> int:
|
|
214
|
+
return ((value & 0xFF) << 8) | (value >> 8)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def CpioFile(*args, **kwargs) -> tarfile.TarFile: # noqa: N802
|
|
218
|
+
"""Utility wrapper around ``tarfile.TarFile`` to easily open cpio archives."""
|
|
219
|
+
kwargs.setdefault("format", FORMAT_CPIO_UNKNOWN)
|
|
220
|
+
return tarfile.TarFile(*args, **kwargs, tarinfo=CpioInfo)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def open(*args, **kwargs) -> tarfile.TarFile:
|
|
224
|
+
"""Utility wrapper around ``tarfile.open`` to easily open cpio archives."""
|
|
225
|
+
kwargs.setdefault("format", FORMAT_CPIO_UNKNOWN)
|
|
226
|
+
return tarfile.open(*args, **kwargs, tarinfo=CpioInfo)
|
|
File without changes
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import codecs
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def error_handler(error: Exception) -> tuple[str, int]:
|
|
5
|
+
if not isinstance(error, UnicodeDecodeError):
|
|
6
|
+
raise error
|
|
7
|
+
|
|
8
|
+
result = []
|
|
9
|
+
for i in range(error.start, error.end):
|
|
10
|
+
byte = error.object[i]
|
|
11
|
+
if byte < 128:
|
|
12
|
+
raise error
|
|
13
|
+
result.append(chr(0xDC00 + byte))
|
|
14
|
+
|
|
15
|
+
return "".join(result), error.end
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
codecs.lookup_error("surrogateescape")
|
|
20
|
+
except LookupError:
|
|
21
|
+
codecs.register_error("surrogateescape", error_handler)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from dissect.util.hash import crc32c
|
|
2
|
+
|
|
3
|
+
crc32c_python = crc32c
|
|
4
|
+
|
|
5
|
+
# This selects between a native Rust version of crc32c (when available) and our own
|
|
6
|
+
# pure-Python implementation.
|
|
7
|
+
#
|
|
8
|
+
# By doing a:
|
|
9
|
+
# from dissect.util.hash import crc32c
|
|
10
|
+
#
|
|
11
|
+
# in another project will automatically give you one or the other.
|
|
12
|
+
#
|
|
13
|
+
# The native Rust version is also available as dissect.util.hash.crc32c_native (when available)
|
|
14
|
+
# and the pure Python version is always available as dissect.util.hash.crc32c_python.
|
|
15
|
+
try:
|
|
16
|
+
from dissect.util import _native
|
|
17
|
+
|
|
18
|
+
crc32c = crc32c_native = _native.hash.crc32c
|
|
19
|
+
except (ImportError, AttributeError):
|
|
20
|
+
crc32c_native = None
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"crc32",
|
|
24
|
+
"crc32c",
|
|
25
|
+
"crc32c_native",
|
|
26
|
+
"crc32c_python",
|
|
27
|
+
"jenkins",
|
|
28
|
+
]
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import zlib
|
|
2
|
+
from functools import lru_cache
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@lru_cache(maxsize=32)
|
|
6
|
+
def _table(polynomial: int) -> tuple[int, ...]:
|
|
7
|
+
"""Generate a CRC32 table for a given (reversed) polynomial.
|
|
8
|
+
|
|
9
|
+
Args:
|
|
10
|
+
polynomial: The (reversed) polynomial to use for the CRC32 calculation.
|
|
11
|
+
"""
|
|
12
|
+
table = []
|
|
13
|
+
for i in range(256):
|
|
14
|
+
crc = i
|
|
15
|
+
for _ in range(8):
|
|
16
|
+
if (crc & 1) != 0:
|
|
17
|
+
crc = (crc >> 1) ^ polynomial
|
|
18
|
+
else:
|
|
19
|
+
crc >>= 1
|
|
20
|
+
crc &= 0xFFFFFFFF
|
|
21
|
+
table.append(crc)
|
|
22
|
+
return tuple(table)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def update(crc: int, data: bytes, polynomial: int = 0xEDB88320, table: tuple[int, ...] | None = None) -> int:
|
|
26
|
+
"""Update CRC32 checksum with data.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
crc: The initial value of the checksum.
|
|
30
|
+
data: The data to update the checksum with.
|
|
31
|
+
polynomial: The (reversed) polynomial to use for the CRC32 calculation. Default is 0xEDB88320 (crc32b).
|
|
32
|
+
table: Optional precomputed CRC32 table. If None, a table will be generated using the given polynomial.
|
|
33
|
+
"""
|
|
34
|
+
if polynomial == 0xEDB88320 and table is None:
|
|
35
|
+
return zlib.crc32(data, crc)
|
|
36
|
+
|
|
37
|
+
if table is None:
|
|
38
|
+
table = _table(polynomial)
|
|
39
|
+
|
|
40
|
+
crc = crc ^ 0xFFFFFFFF
|
|
41
|
+
for b in data:
|
|
42
|
+
crc = table[(crc ^ b) & 0xFF] ^ ((crc >> 8) & 0xFFFFFFFF)
|
|
43
|
+
return crc ^ 0xFFFFFFFF
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def crc32(data: bytes, value: int = 0, polynomial: int = 0xEDB88320, table: tuple[int, ...] | None = None) -> int:
|
|
47
|
+
"""Calculate CRC32 checksum of some data, with an optional initial value and polynomial.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
data: The data to calculate the checksum of.
|
|
51
|
+
value: The initial value of the checksum. Default is 0.
|
|
52
|
+
polynomial: The (reversed) polynomial to use for the CRC32 calculation. Default is 0xEDB88320 (crc32b).
|
|
53
|
+
table: Optional precomputed CRC32 table. If None, a table will be generated using the given polynomial.
|
|
54
|
+
"""
|
|
55
|
+
return update(value, data, polynomial, table) & 0xFFFFFFFF
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# fmt: off
|
|
2
|
+
_TABLE = (
|
|
3
|
+
0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
|
|
4
|
+
0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
|
|
5
|
+
0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
|
|
6
|
+
0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
|
|
7
|
+
0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
|
|
8
|
+
0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
|
|
9
|
+
0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
|
|
10
|
+
0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
|
|
11
|
+
0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
|
|
12
|
+
0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
|
|
13
|
+
0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
|
|
14
|
+
0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
|
|
15
|
+
0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
|
|
16
|
+
0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
|
|
17
|
+
0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
|
|
18
|
+
0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
|
|
19
|
+
0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
|
|
20
|
+
0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
|
|
21
|
+
0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
|
|
22
|
+
0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
|
|
23
|
+
0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
|
|
24
|
+
0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
|
|
25
|
+
0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
|
|
26
|
+
0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
|
|
27
|
+
0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
|
|
28
|
+
0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
|
|
29
|
+
0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
|
|
30
|
+
0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
|
|
31
|
+
0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
|
|
32
|
+
0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
|
|
33
|
+
0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
|
|
34
|
+
0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
|
|
35
|
+
)
|
|
36
|
+
# fmt: on
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def update(crc: int, data: bytes) -> int:
|
|
40
|
+
"""Update CRC32C checksum with data.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
crc: The initial value of the checksum.
|
|
44
|
+
data: The data to update the checksum with.
|
|
45
|
+
"""
|
|
46
|
+
crc = crc ^ 0xFFFFFFFF
|
|
47
|
+
for b in data:
|
|
48
|
+
table_idx = (crc ^ b) & 0xFF
|
|
49
|
+
crc = _TABLE[table_idx] ^ ((crc >> 8) & 0xFFFFFFFF)
|
|
50
|
+
return crc ^ 0xFFFFFFFF
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def crc32c(data: bytes, value: int = 0) -> int:
|
|
54
|
+
"""Calculate CRC32C checksum of some data, with an optional initial value.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
data: The data to calculate the checksum of.
|
|
58
|
+
value: The initial value of the checksum. Default is 0.
|
|
59
|
+
"""
|
|
60
|
+
return update(value, data) & 0xFFFFFFFF
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from struct import unpack
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def _mix64(a: int, b: int, c: int) -> int:
|
|
5
|
+
"""Mixes three 64-bit values reversibly."""
|
|
6
|
+
# Implement logical right shift by masking first
|
|
7
|
+
a = (a - b - c) ^ ((c & 0xFFFFFFFFFFFFFFFF) >> 43)
|
|
8
|
+
b = (b - c - a) ^ (a << 9)
|
|
9
|
+
c = (c - a - b) ^ ((b & 0xFFFFFFFFFFFFFFFF) >> 8)
|
|
10
|
+
a = (a - b - c) ^ ((c & 0xFFFFFFFFFFFFFFFF) >> 38)
|
|
11
|
+
b = (b - c - a) ^ (a << 23)
|
|
12
|
+
c = (c - a - b) ^ ((b & 0xFFFFFFFFFFFFFFFF) >> 5)
|
|
13
|
+
a = (a - b - c) ^ ((c & 0xFFFFFFFFFFFFFFFF) >> 35)
|
|
14
|
+
b = (b - c - a) ^ (a << 49)
|
|
15
|
+
c = (c - a - b) ^ ((b & 0xFFFFFFFFFFFFFFFF) >> 11)
|
|
16
|
+
a = (a - b - c) ^ ((c & 0xFFFFFFFFFFFFFFFF) >> 12)
|
|
17
|
+
b = (b - c - a) ^ (a << 18)
|
|
18
|
+
c = (c - a - b) ^ ((b & 0xFFFFFFFFFFFFFFFF) >> 22)
|
|
19
|
+
|
|
20
|
+
# Normalize to 64 bits
|
|
21
|
+
return a & 0xFFFFFFFFFFFFFFFF, b & 0xFFFFFFFFFFFFFFFF, c & 0xFFFFFFFFFFFFFFFF
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def lookup8(key: bytes, level: int) -> int:
|
|
25
|
+
"""Hashes a variable-length key into a 64-bit value.
|
|
26
|
+
|
|
27
|
+
This hash function is used in the ESXi kernel.
|
|
28
|
+
|
|
29
|
+
References:
|
|
30
|
+
- http://burtleburtle.net/bob/c/lookup8.c
|
|
31
|
+
"""
|
|
32
|
+
a: int = level
|
|
33
|
+
b: int = level
|
|
34
|
+
c: int = 0x9E3779B97F4A7C13 # Golden ratio, arbitrary value
|
|
35
|
+
bytes_left: int = len(key)
|
|
36
|
+
i: int = 0
|
|
37
|
+
|
|
38
|
+
# Process the key in 24-byte chunks
|
|
39
|
+
while bytes_left >= 24:
|
|
40
|
+
a += int.from_bytes(key[i : i + 8], "little")
|
|
41
|
+
b += int.from_bytes(key[i + 8 : i + 16], "little")
|
|
42
|
+
c += int.from_bytes(key[i + 16 : i + 24], "little")
|
|
43
|
+
a, b, c = _mix64(a, b, c)
|
|
44
|
+
i += 24
|
|
45
|
+
bytes_left -= 24
|
|
46
|
+
|
|
47
|
+
# Handle the last 23 bytes
|
|
48
|
+
c = c + len(key)
|
|
49
|
+
if bytes_left > 0:
|
|
50
|
+
for shift, byte in enumerate(key[i:]):
|
|
51
|
+
if shift < 8:
|
|
52
|
+
a += byte << (shift * 8)
|
|
53
|
+
elif shift < 16:
|
|
54
|
+
b += byte << ((shift - 8) * 8)
|
|
55
|
+
else:
|
|
56
|
+
# c takes 23 - 8 - 8 = 7 bytes (length is added to LSB)
|
|
57
|
+
c += byte << ((shift - 15) * 8)
|
|
58
|
+
|
|
59
|
+
_, _, c = _mix64(a, b, c)
|
|
60
|
+
return c
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def lookup8_quads(key: bytes, level: int) -> int:
|
|
64
|
+
"""Hashes a key consisting of ``num`` 64-bit integers into a 64-bit value.
|
|
65
|
+
|
|
66
|
+
This hash function is used in the ESXi kernel, but unlike :func:`lookup8`, this variant is not compatible with
|
|
67
|
+
any of the original ``lookup8.c`` implementations. The difference between this variant and :func:`lookup8`
|
|
68
|
+
is that in the final step, the value of ``c`` is incremented by the number of quads, not the number
|
|
69
|
+
of bytes in the key. While ``hash2`` in ``lookup8.c`` is also optimized for 64-bit aligned keys,
|
|
70
|
+
(and uses the number of quads as argument for the key size, not bytes) it uses the length of the key
|
|
71
|
+
in bytes to increment ``c`` in the final step.
|
|
72
|
+
|
|
73
|
+
References:
|
|
74
|
+
- http://burtleburtle.net/bob/c/lookup8.c
|
|
75
|
+
- ``HashFunc_HashQuads``
|
|
76
|
+
"""
|
|
77
|
+
num = len(key) // 8
|
|
78
|
+
quads = unpack(f"<{num}Q", key)
|
|
79
|
+
remaining = num
|
|
80
|
+
|
|
81
|
+
a = level
|
|
82
|
+
b = level
|
|
83
|
+
c = 0x9E3779B97F4A7C13 # Golden ratio, arbitrary value
|
|
84
|
+
while remaining > 2:
|
|
85
|
+
a += quads[num - remaining]
|
|
86
|
+
b += quads[num - remaining + 1]
|
|
87
|
+
c += quads[num - remaining + 2]
|
|
88
|
+
a, b, c = _mix64(a, b, c)
|
|
89
|
+
remaining -= 3
|
|
90
|
+
|
|
91
|
+
# This is the main difference from lookup8:
|
|
92
|
+
# c is incremented by the number of quads, not the length of the key.
|
|
93
|
+
c = c + num
|
|
94
|
+
if remaining == 2:
|
|
95
|
+
a += quads[num - remaining]
|
|
96
|
+
b += quads[num - remaining + 1]
|
|
97
|
+
|
|
98
|
+
if remaining == 1:
|
|
99
|
+
a += quads[num - remaining]
|
|
100
|
+
|
|
101
|
+
_, _, c = _mix64(a, b, c)
|
|
102
|
+
return c
|