dissect.util 3.24.dev4__cp314-cp314t-musllinux_1_2_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. dissect/util/__init__.py +20 -0
  2. dissect/util/_build.py +17 -0
  3. dissect/util/_native/__init__.pyi +3 -0
  4. dissect/util/_native/compression/__init__.pyi +3 -0
  5. dissect/util/_native/compression/lz4.pyi +7 -0
  6. dissect/util/_native/compression/lzo.pyi +3 -0
  7. dissect/util/_native/hash/__init__.py +3 -0
  8. dissect/util/_native/hash/crc32c.py +2 -0
  9. dissect/util/_native.cpython-314t-aarch64-linux-musl.so +0 -0
  10. dissect/util/compression/__init__.py +45 -0
  11. dissect/util/compression/lz4.py +95 -0
  12. dissect/util/compression/lzbitmap.py +130 -0
  13. dissect/util/compression/lzfse.py +467 -0
  14. dissect/util/compression/lznt1.py +92 -0
  15. dissect/util/compression/lzo.py +118 -0
  16. dissect/util/compression/lzvn.py +241 -0
  17. dissect/util/compression/lzxpress.py +80 -0
  18. dissect/util/compression/lzxpress_huffman.py +184 -0
  19. dissect/util/compression/sevenbit.py +77 -0
  20. dissect/util/compression/snappy.py +86 -0
  21. dissect/util/compression/xz.py +112 -0
  22. dissect/util/cpio.py +226 -0
  23. dissect/util/encoding/__init__.py +0 -0
  24. dissect/util/encoding/surrogateescape.py +21 -0
  25. dissect/util/exceptions.py +6 -0
  26. dissect/util/hash/__init__.py +28 -0
  27. dissect/util/hash/crc32.py +55 -0
  28. dissect/util/hash/crc32c.py +60 -0
  29. dissect/util/hash/jenkins.py +102 -0
  30. dissect/util/ldap.py +237 -0
  31. dissect/util/plist.py +156 -0
  32. dissect/util/sid.py +81 -0
  33. dissect/util/stream.py +772 -0
  34. dissect/util/tools/__init__.py +0 -0
  35. dissect/util/tools/dump_nskeyedarchiver.py +61 -0
  36. dissect/util/ts.py +295 -0
  37. dissect/util/xmemoryview.py +117 -0
  38. dissect_util-3.24.dev4.dist-info/METADATA +89 -0
  39. dissect_util-3.24.dev4.dist-info/RECORD +46 -0
  40. dissect_util-3.24.dev4.dist-info/WHEEL +5 -0
  41. dissect_util-3.24.dev4.dist-info/entry_points.txt +2 -0
  42. dissect_util-3.24.dev4.dist-info/licenses/COPYRIGHT +5 -0
  43. dissect_util-3.24.dev4.dist-info/licenses/LICENSE +201 -0
  44. dissect_util-3.24.dev4.dist-info/sboms/auditwheel.cdx.json +1 -0
  45. dissect_util-3.24.dev4.dist-info/top_level.txt +1 -0
  46. dissect_util.libs/libgcc_s-2d945d6c.so.1 +0 -0
@@ -0,0 +1,112 @@
1
+ import io
2
+ from binascii import crc32
3
+ from typing import BinaryIO
4
+
5
+ from dissect.util.stream import OverlayStream
6
+
7
+ HEADER_FOOTER_SIZE = 12
8
+ CRC_SIZE = 4
9
+
10
+
11
+ def repair_checksum(fh: BinaryIO) -> BinaryIO:
12
+ """Repair CRC32 checksums for all headers in an XZ stream.
13
+
14
+ FortiOS XZ files have (on purpose) corrupt streams which they read using a modified ``xz`` binary.
15
+ The only thing changed are the CRC32 checksums, so partially parse the XZ file and fix all of them.
16
+
17
+ References:
18
+ - https://tukaani.org/xz/xz-file-format-1.1.0.txt
19
+ - https://github.com/Rogdham/python-xz
20
+
21
+ Args:
22
+ fh: A file-like object of an LZMA stream to repair.
23
+ """
24
+ file_size = fh.seek(0, io.SEEK_END)
25
+ repaired = OverlayStream(fh, file_size)
26
+ fh.seek(0)
27
+
28
+ header = fh.read(HEADER_FOOTER_SIZE)
29
+ # Check header magic
30
+ magic = b"\xfd7zXZ\x00"
31
+ if header[: len(magic)] != magic:
32
+ raise ValueError("Not an XZ file")
33
+
34
+ # Add correct header CRC32
35
+ repaired.add(fh.tell() - CRC_SIZE, _crc32(header[len(magic) : HEADER_FOOTER_SIZE - CRC_SIZE]))
36
+
37
+ footer_offset = fh.seek(-HEADER_FOOTER_SIZE, io.SEEK_END)
38
+ footer = fh.read(HEADER_FOOTER_SIZE)
39
+
40
+ # Check footer magic
41
+ footer_magic = b"YZ"
42
+ if footer[HEADER_FOOTER_SIZE - len(footer_magic) : HEADER_FOOTER_SIZE] != footer_magic:
43
+ raise ValueError("Not an XZ file")
44
+
45
+ # Add correct footer CRC32
46
+ repaired.add(footer_offset, _crc32(footer[CRC_SIZE : HEADER_FOOTER_SIZE - len(footer_magic)]))
47
+
48
+ backward_size = (int.from_bytes(footer[4:8], "little") + 1) * 4
49
+ fh.seek(-HEADER_FOOTER_SIZE - backward_size, io.SEEK_END)
50
+ index = fh.read(backward_size)
51
+
52
+ # Add correct index CRC32
53
+ repaired.add(fh.tell() - CRC_SIZE, _crc32(index[:-CRC_SIZE]))
54
+
55
+ # Parse the index
56
+ isize, num_records = _mbi(index[1:])
57
+ index = index[1 + isize : -4]
58
+ records = []
59
+ for _ in range(num_records):
60
+ if not index:
61
+ raise ValueError("Missing index size")
62
+
63
+ isize, unpadded_size = _mbi(index)
64
+ if not unpadded_size:
65
+ raise ValueError("Missing index record unpadded size")
66
+
67
+ index = index[isize:]
68
+ if not index:
69
+ raise ValueError("Missing index size")
70
+
71
+ isize, uncompressed_size = _mbi(index)
72
+ if not uncompressed_size:
73
+ raise ValueError("Missing index record uncompressed size")
74
+
75
+ index = index[isize:]
76
+ records.append((unpadded_size, uncompressed_size))
77
+
78
+ block_start = file_size - HEADER_FOOTER_SIZE - backward_size
79
+ blocks_len = sum((unpadded_size + 3) & ~3 for unpadded_size, _ in records)
80
+ block_start -= blocks_len
81
+
82
+ # Iterate over all blocks and add the correct block header CRC32
83
+ for unpadded_size, _ in records:
84
+ fh.seek(block_start)
85
+
86
+ block_header = fh.read(1)
87
+ block_header_size = (block_header[0] + 1) * 4
88
+ block_header += fh.read(block_header_size - 1)
89
+ repaired.add(fh.tell() - CRC_SIZE, _crc32(block_header[:-CRC_SIZE]))
90
+
91
+ block_start += (unpadded_size + 3) & ~3
92
+
93
+ return repaired
94
+
95
+
96
+ def _mbi(data: bytes) -> tuple[int, int]:
97
+ """Decode a multibyte integer.
98
+
99
+ The encoding is similar to most other "varint" encodings. For each byte, the 7 least significant bits are used for
100
+ the integer value. The most significant bit is used to indicate if the integer continues in the next byte.
101
+ Bytes are ordered in little endian byte order, meaning the least significant byte comes first.
102
+ """
103
+ value = 0
104
+ for size, byte in enumerate(data):
105
+ value |= (byte & 0x7F) << (size * 7)
106
+ if not byte & 0x80:
107
+ return size + 1, value
108
+ raise ValueError("Invalid mbi")
109
+
110
+
111
+ def _crc32(data: bytes) -> bytes:
112
+ return int.to_bytes(crc32(data), CRC_SIZE, "little")
dissect/util/cpio.py ADDED
@@ -0,0 +1,226 @@
1
+ from __future__ import annotations
2
+
3
+ import stat
4
+ import struct
5
+ import tarfile
6
+ from tarfile import InvalidHeaderError
7
+ from typing import BinaryIO
8
+
9
+ FORMAT_CPIO_BIN = 10
10
+ FORMAT_CPIO_ODC = 11
11
+ FORMAT_CPIO_NEWC = 12
12
+ FORMAT_CPIO_CRC = 13
13
+ FORMAT_CPIO_HPBIN = 16
14
+ FORMAT_CPIO_HPODC = 17
15
+ FORMAT_CPIO_UNKNOWN = 18
16
+
17
+ CPIO_MAGIC_OLD = 0o070707
18
+ CPIO_MAGIC_NEW = 0o070701
19
+ CPIO_MAGIC_CRC = 0o070702
20
+
21
+ TYPE_MAP = {
22
+ stat.S_IFREG: tarfile.REGTYPE,
23
+ stat.S_IFDIR: tarfile.DIRTYPE,
24
+ stat.S_IFIFO: tarfile.FIFOTYPE,
25
+ stat.S_IFLNK: tarfile.SYMTYPE,
26
+ stat.S_IFCHR: tarfile.CHRTYPE,
27
+ stat.S_IFBLK: tarfile.BLKTYPE,
28
+ }
29
+
30
+
31
+ class CpioInfo(tarfile.TarInfo):
32
+ """Custom ``TarInfo`` implementation for reading cpio archives.
33
+
34
+ Examples::
35
+
36
+ tarfile.open(..., tarinfo=CpioInfo)
37
+ # or
38
+ tarfile.TarFile(..., tarinfo=CpioInfo)
39
+
40
+ """
41
+
42
+ @classmethod
43
+ def fromtarfile(cls, tarfile: tarfile.TarFile) -> CpioInfo:
44
+ if tarfile.format not in (
45
+ FORMAT_CPIO_BIN,
46
+ FORMAT_CPIO_ODC,
47
+ FORMAT_CPIO_NEWC,
48
+ FORMAT_CPIO_CRC,
49
+ FORMAT_CPIO_HPBIN,
50
+ FORMAT_CPIO_HPODC,
51
+ ):
52
+ tarfile.format = detect_header(tarfile.fileobj)
53
+
54
+ if tarfile.format in (FORMAT_CPIO_BIN, FORMAT_CPIO_HPBIN):
55
+ buf = tarfile.fileobj.read(26)
56
+ elif tarfile.format in (FORMAT_CPIO_ODC, FORMAT_CPIO_HPODC):
57
+ buf = tarfile.fileobj.read(76)
58
+ elif tarfile.format in (FORMAT_CPIO_NEWC, FORMAT_CPIO_CRC):
59
+ buf = tarfile.fileobj.read(110)
60
+ else:
61
+ raise InvalidHeaderError("Unknown cpio type")
62
+
63
+ obj = cls.frombuf(buf, tarfile.format, tarfile.encoding, tarfile.errors)
64
+ obj.format = tarfile.format
65
+ obj.offset = tarfile.fileobj.tell() - len(buf)
66
+ return obj._proc_member(tarfile)
67
+
68
+ @classmethod
69
+ def frombuf(cls, buf: bytes, format: int, encoding: str, errors: str) -> CpioInfo:
70
+ if format in (FORMAT_CPIO_BIN, FORMAT_CPIO_ODC, FORMAT_CPIO_HPBIN, FORMAT_CPIO_HPODC):
71
+ obj = cls._old_frombuf(buf, format)
72
+ elif format in (FORMAT_CPIO_NEWC, FORMAT_CPIO_CRC):
73
+ obj = cls._new_frombuf(buf, format)
74
+
75
+ # Common postprocessing
76
+ ftype = stat.S_IFMT(obj._mode)
77
+ obj.type = TYPE_MAP.get(ftype, ftype)
78
+ obj.mode = stat.S_IMODE(obj._mode)
79
+
80
+ return obj
81
+
82
+ @classmethod
83
+ def _old_frombuf(cls, buf: bytes, format: int) -> CpioInfo:
84
+ if format in (FORMAT_CPIO_BIN, FORMAT_CPIO_HPBIN):
85
+ values = list(struct.unpack("<13H", buf))
86
+ if values[0] == _swap16(CPIO_MAGIC_OLD):
87
+ values = [_swap16(v) for v in values]
88
+
89
+ mtime = (values.pop(8) << 16) | values.pop(8)
90
+ size = (values.pop(9) << 16) | values.pop(9)
91
+ values.insert(8, mtime)
92
+ values.append(size)
93
+ else:
94
+ values = [int(v, 8) for v in struct.unpack("<6s6s6s6s6s6s6s6s11s6s11s", buf)]
95
+
96
+ if values[0] != CPIO_MAGIC_OLD:
97
+ raise InvalidHeaderError(f"Invalid (old) ASCII/binary cpio header magic: {oct(values[0])}")
98
+
99
+ obj = cls()
100
+ obj.devmajor = values[1] >> 8
101
+ obj.devminor = values[1] & 0xFF
102
+ obj._mode = values[3]
103
+ obj.uid = values[4]
104
+ obj.gid = values[5]
105
+ obj.mtime = values[8]
106
+ obj.size = values[10]
107
+
108
+ # Extra fields
109
+ obj.magic = values[0]
110
+ obj.ino = values[2]
111
+ obj.nlink = values[6]
112
+ obj.rdevmajor = values[7] >> 8
113
+ obj.rdevminor = values[7] & 0xFF
114
+ obj.namesize = values[9]
115
+
116
+ # This is a specific case for HP/UX cpio archives, which I'll let this comment from the original source explain:
117
+ # HP/UX cpio creates archives that look just like ordinary archives,
118
+ # but for devices it sets major = 0, minor = 1, and puts the
119
+ # actual major/minor number in the filesize field. See if this
120
+ # is an HP/UX cpio archive, and if so fix it. We have to do this
121
+ # here because process_copy_in() assumes filesize is always 0
122
+ # for devices.
123
+ if (
124
+ stat.S_IFMT(obj.mode) in (stat.S_IFCHR, stat.S_IFBLK, stat.S_IFSOCK, stat.S_IFIFO)
125
+ and obj.size != 0
126
+ and obj.rdevmajor == 0
127
+ and obj.rdevminor == 1
128
+ ):
129
+ obj.rdevmajor = (obj.size >> 8) & 0xFF
130
+ obj.rdevminor = obj.size & 0xFF
131
+ obj.size = 0
132
+
133
+ return obj
134
+
135
+ @classmethod
136
+ def _new_frombuf(cls, buf: bytes, format: int) -> CpioInfo:
137
+ values = struct.unpack("<6s8s8s8s8s8s8s8s8s8s8s8s8s8s", buf)
138
+ values = [int(values[0], 8)] + [int(v, 16) for v in values[1:]]
139
+ if values[0] not in (CPIO_MAGIC_NEW, CPIO_MAGIC_CRC):
140
+ raise InvalidHeaderError(f"Invalid (new) ASCII cpio header magic: {oct(values[0])}")
141
+
142
+ obj = cls()
143
+ obj._mode = values[2]
144
+ obj.uid = values[3]
145
+ obj.gid = values[4]
146
+ obj.mtime = values[6]
147
+ obj.size = values[7]
148
+ obj.devmajor = values[8]
149
+ obj.devminor = values[9]
150
+ obj.chksum = values[13]
151
+
152
+ # Extra fields
153
+ obj.magic = values[0]
154
+ obj.ino = values[1]
155
+ obj.nlink = values[5]
156
+ obj.rdevmajor = values[10]
157
+ obj.rdevminor = values[11]
158
+ obj.namesize = values[12]
159
+
160
+ return obj
161
+
162
+ def _proc_member(self, tarfile: tarfile.TarFile) -> CpioInfo | None:
163
+ self.name = tarfile.fileobj.read(self.namesize - 1).decode(tarfile.encoding, tarfile.errors)
164
+ if self.name == "TRAILER!!!":
165
+ # The last entry in a cpio file has the special name ``TRAILER!!!``, indicating the end of the archive
166
+ return None
167
+
168
+ offset = tarfile.fileobj.tell() + 1
169
+ self.offset_data = self._round_word(offset)
170
+ tarfile.offset = self._round_word(self.offset_data + self.size)
171
+
172
+ if self.issym():
173
+ tarfile.fileobj.seek(self.offset_data)
174
+ self.linkname = tarfile.fileobj.read(self.size).decode(tarfile.encoding, tarfile.errors)
175
+ self.size = 0
176
+
177
+ return self
178
+
179
+ def _round_word(self, offset: int) -> int:
180
+ if self.format in (FORMAT_CPIO_BIN, FORMAT_CPIO_HPBIN):
181
+ return (offset + 1) & ~0x01
182
+
183
+ if self.format in (FORMAT_CPIO_NEWC, FORMAT_CPIO_CRC):
184
+ return (offset + 3) & ~0x03
185
+
186
+ return offset
187
+
188
+ def issocket(self) -> bool:
189
+ """Return True if it is a socket."""
190
+ return self.type == stat.S_IFSOCK
191
+
192
+
193
+ def detect_header(fh: BinaryIO) -> int:
194
+ """Detect a cpio format on a file-like object."""
195
+ offset = fh.tell()
196
+ magic = fh.read(6)
197
+ fh.seek(offset)
198
+
199
+ result = FORMAT_CPIO_UNKNOWN
200
+ if magic == b"070701":
201
+ result = FORMAT_CPIO_NEWC
202
+ elif magic == b"070707":
203
+ result = FORMAT_CPIO_ODC
204
+ elif magic == b"070702":
205
+ result = FORMAT_CPIO_CRC
206
+ elif magic[:2] in (b"\x71\xc7", b"\xc7\x71"):
207
+ # 0o070707 in little and big endian
208
+ result = FORMAT_CPIO_BIN
209
+
210
+ return result
211
+
212
+
213
+ def _swap16(value: int) -> int:
214
+ return ((value & 0xFF) << 8) | (value >> 8)
215
+
216
+
217
+ def CpioFile(*args, **kwargs) -> tarfile.TarFile: # noqa: N802
218
+ """Utility wrapper around ``tarfile.TarFile`` to easily open cpio archives."""
219
+ kwargs.setdefault("format", FORMAT_CPIO_UNKNOWN)
220
+ return tarfile.TarFile(*args, **kwargs, tarinfo=CpioInfo)
221
+
222
+
223
+ def open(*args, **kwargs) -> tarfile.TarFile:
224
+ """Utility wrapper around ``tarfile.open`` to easily open cpio archives."""
225
+ kwargs.setdefault("format", FORMAT_CPIO_UNKNOWN)
226
+ return tarfile.open(*args, **kwargs, tarinfo=CpioInfo)
File without changes
@@ -0,0 +1,21 @@
1
+ import codecs
2
+
3
+
4
+ def error_handler(error: Exception) -> tuple[str, int]:
5
+ if not isinstance(error, UnicodeDecodeError):
6
+ raise error
7
+
8
+ result = []
9
+ for i in range(error.start, error.end):
10
+ byte = error.object[i]
11
+ if byte < 128:
12
+ raise error
13
+ result.append(chr(0xDC00 + byte))
14
+
15
+ return "".join(result), error.end
16
+
17
+
18
+ try:
19
+ codecs.lookup_error("surrogateescape")
20
+ except LookupError:
21
+ codecs.register_error("surrogateescape", error_handler)
@@ -0,0 +1,6 @@
1
+ class Error(Exception):
2
+ pass
3
+
4
+
5
+ class CorruptDataError(Error):
6
+ pass
@@ -0,0 +1,28 @@
1
+ from dissect.util.hash import crc32c
2
+
3
+ crc32c_python = crc32c
4
+
5
+ # This selects between a native Rust version of crc32c (when available) and our own
6
+ # pure-Python implementation.
7
+ #
8
+ # By doing a:
9
+ # from dissect.util.hash import crc32c
10
+ #
11
+ # in another project will automatically give you one or the other.
12
+ #
13
+ # The native Rust version is also available as dissect.util.hash.crc32c_native (when available)
14
+ # and the pure Python version is always available as dissect.util.hash.crc32c_python.
15
+ try:
16
+ from dissect.util import _native
17
+
18
+ crc32c = crc32c_native = _native.hash.crc32c
19
+ except (ImportError, AttributeError):
20
+ crc32c_native = None
21
+
22
+ __all__ = [
23
+ "crc32",
24
+ "crc32c",
25
+ "crc32c_native",
26
+ "crc32c_python",
27
+ "jenkins",
28
+ ]
@@ -0,0 +1,55 @@
1
+ import zlib
2
+ from functools import lru_cache
3
+
4
+
5
+ @lru_cache(maxsize=32)
6
+ def _table(polynomial: int) -> tuple[int, ...]:
7
+ """Generate a CRC32 table for a given (reversed) polynomial.
8
+
9
+ Args:
10
+ polynomial: The (reversed) polynomial to use for the CRC32 calculation.
11
+ """
12
+ table = []
13
+ for i in range(256):
14
+ crc = i
15
+ for _ in range(8):
16
+ if (crc & 1) != 0:
17
+ crc = (crc >> 1) ^ polynomial
18
+ else:
19
+ crc >>= 1
20
+ crc &= 0xFFFFFFFF
21
+ table.append(crc)
22
+ return tuple(table)
23
+
24
+
25
+ def update(crc: int, data: bytes, polynomial: int = 0xEDB88320, table: tuple[int, ...] | None = None) -> int:
26
+ """Update CRC32 checksum with data.
27
+
28
+ Args:
29
+ crc: The initial value of the checksum.
30
+ data: The data to update the checksum with.
31
+ polynomial: The (reversed) polynomial to use for the CRC32 calculation. Default is 0xEDB88320 (crc32b).
32
+ table: Optional precomputed CRC32 table. If None, a table will be generated using the given polynomial.
33
+ """
34
+ if polynomial == 0xEDB88320 and table is None:
35
+ return zlib.crc32(data, crc)
36
+
37
+ if table is None:
38
+ table = _table(polynomial)
39
+
40
+ crc = crc ^ 0xFFFFFFFF
41
+ for b in data:
42
+ crc = table[(crc ^ b) & 0xFF] ^ ((crc >> 8) & 0xFFFFFFFF)
43
+ return crc ^ 0xFFFFFFFF
44
+
45
+
46
+ def crc32(data: bytes, value: int = 0, polynomial: int = 0xEDB88320, table: tuple[int, ...] | None = None) -> int:
47
+ """Calculate CRC32 checksum of some data, with an optional initial value and polynomial.
48
+
49
+ Args:
50
+ data: The data to calculate the checksum of.
51
+ value: The initial value of the checksum. Default is 0.
52
+ polynomial: The (reversed) polynomial to use for the CRC32 calculation. Default is 0xEDB88320 (crc32b).
53
+ table: Optional precomputed CRC32 table. If None, a table will be generated using the given polynomial.
54
+ """
55
+ return update(value, data, polynomial, table) & 0xFFFFFFFF
@@ -0,0 +1,60 @@
1
+ # fmt: off
2
+ _TABLE = (
3
+ 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
4
+ 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
5
+ 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
6
+ 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
7
+ 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
8
+ 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
9
+ 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
10
+ 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
11
+ 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
12
+ 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
13
+ 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
14
+ 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
15
+ 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
16
+ 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
17
+ 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
18
+ 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
19
+ 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
20
+ 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
21
+ 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
22
+ 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
23
+ 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
24
+ 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
25
+ 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
26
+ 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
27
+ 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
28
+ 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
29
+ 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
30
+ 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
31
+ 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
32
+ 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
33
+ 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
34
+ 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
35
+ )
36
+ # fmt: on
37
+
38
+
39
+ def update(crc: int, data: bytes) -> int:
40
+ """Update CRC32C checksum with data.
41
+
42
+ Args:
43
+ crc: The initial value of the checksum.
44
+ data: The data to update the checksum with.
45
+ """
46
+ crc = crc ^ 0xFFFFFFFF
47
+ for b in data:
48
+ table_idx = (crc ^ b) & 0xFF
49
+ crc = _TABLE[table_idx] ^ ((crc >> 8) & 0xFFFFFFFF)
50
+ return crc ^ 0xFFFFFFFF
51
+
52
+
53
+ def crc32c(data: bytes, value: int = 0) -> int:
54
+ """Calculate CRC32C checksum of some data, with an optional initial value.
55
+
56
+ Args:
57
+ data: The data to calculate the checksum of.
58
+ value: The initial value of the checksum. Default is 0.
59
+ """
60
+ return update(value, data) & 0xFFFFFFFF
@@ -0,0 +1,102 @@
1
+ from struct import unpack
2
+
3
+
4
+ def _mix64(a: int, b: int, c: int) -> int:
5
+ """Mixes three 64-bit values reversibly."""
6
+ # Implement logical right shift by masking first
7
+ a = (a - b - c) ^ ((c & 0xFFFFFFFFFFFFFFFF) >> 43)
8
+ b = (b - c - a) ^ (a << 9)
9
+ c = (c - a - b) ^ ((b & 0xFFFFFFFFFFFFFFFF) >> 8)
10
+ a = (a - b - c) ^ ((c & 0xFFFFFFFFFFFFFFFF) >> 38)
11
+ b = (b - c - a) ^ (a << 23)
12
+ c = (c - a - b) ^ ((b & 0xFFFFFFFFFFFFFFFF) >> 5)
13
+ a = (a - b - c) ^ ((c & 0xFFFFFFFFFFFFFFFF) >> 35)
14
+ b = (b - c - a) ^ (a << 49)
15
+ c = (c - a - b) ^ ((b & 0xFFFFFFFFFFFFFFFF) >> 11)
16
+ a = (a - b - c) ^ ((c & 0xFFFFFFFFFFFFFFFF) >> 12)
17
+ b = (b - c - a) ^ (a << 18)
18
+ c = (c - a - b) ^ ((b & 0xFFFFFFFFFFFFFFFF) >> 22)
19
+
20
+ # Normalize to 64 bits
21
+ return a & 0xFFFFFFFFFFFFFFFF, b & 0xFFFFFFFFFFFFFFFF, c & 0xFFFFFFFFFFFFFFFF
22
+
23
+
24
+ def lookup8(key: bytes, level: int) -> int:
25
+ """Hashes a variable-length key into a 64-bit value.
26
+
27
+ This hash function is used in the ESXi kernel.
28
+
29
+ References:
30
+ - http://burtleburtle.net/bob/c/lookup8.c
31
+ """
32
+ a: int = level
33
+ b: int = level
34
+ c: int = 0x9E3779B97F4A7C13 # Golden ratio, arbitrary value
35
+ bytes_left: int = len(key)
36
+ i: int = 0
37
+
38
+ # Process the key in 24-byte chunks
39
+ while bytes_left >= 24:
40
+ a += int.from_bytes(key[i : i + 8], "little")
41
+ b += int.from_bytes(key[i + 8 : i + 16], "little")
42
+ c += int.from_bytes(key[i + 16 : i + 24], "little")
43
+ a, b, c = _mix64(a, b, c)
44
+ i += 24
45
+ bytes_left -= 24
46
+
47
+ # Handle the last 23 bytes
48
+ c = c + len(key)
49
+ if bytes_left > 0:
50
+ for shift, byte in enumerate(key[i:]):
51
+ if shift < 8:
52
+ a += byte << (shift * 8)
53
+ elif shift < 16:
54
+ b += byte << ((shift - 8) * 8)
55
+ else:
56
+ # c takes 23 - 8 - 8 = 7 bytes (length is added to LSB)
57
+ c += byte << ((shift - 15) * 8)
58
+
59
+ _, _, c = _mix64(a, b, c)
60
+ return c
61
+
62
+
63
+ def lookup8_quads(key: bytes, level: int) -> int:
64
+ """Hashes a key consisting of ``num`` 64-bit integers into a 64-bit value.
65
+
66
+ This hash function is used in the ESXi kernel, but unlike :func:`lookup8`, this variant is not compatible with
67
+ any of the original ``lookup8.c`` implementations. The difference between this variant and :func:`lookup8`
68
+ is that in the final step, the value of ``c`` is incremented by the number of quads, not the number
69
+ of bytes in the key. While ``hash2`` in ``lookup8.c`` is also optimized for 64-bit aligned keys,
70
+ (and uses the number of quads as argument for the key size, not bytes) it uses the length of the key
71
+ in bytes to increment ``c`` in the final step.
72
+
73
+ References:
74
+ - http://burtleburtle.net/bob/c/lookup8.c
75
+ - ``HashFunc_HashQuads``
76
+ """
77
+ num = len(key) // 8
78
+ quads = unpack(f"<{num}Q", key)
79
+ remaining = num
80
+
81
+ a = level
82
+ b = level
83
+ c = 0x9E3779B97F4A7C13 # Golden ratio, arbitrary value
84
+ while remaining > 2:
85
+ a += quads[num - remaining]
86
+ b += quads[num - remaining + 1]
87
+ c += quads[num - remaining + 2]
88
+ a, b, c = _mix64(a, b, c)
89
+ remaining -= 3
90
+
91
+ # This is the main difference from lookup8:
92
+ # c is incremented by the number of quads, not the length of the key.
93
+ c = c + num
94
+ if remaining == 2:
95
+ a += quads[num - remaining]
96
+ b += quads[num - remaining + 1]
97
+
98
+ if remaining == 1:
99
+ a += quads[num - remaining]
100
+
101
+ _, _, c = _mix64(a, b, c)
102
+ return c