dissect.util 3.24.dev1__cp310-abi3-manylinux_2_28_s390x.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dissect.util might be problematic. Click here for more details.

Files changed (43) hide show
  1. dissect/util/__init__.py +20 -0
  2. dissect/util/_build.py +17 -0
  3. dissect/util/_native/__init__.pyi +3 -0
  4. dissect/util/_native/compression/__init__.pyi +3 -0
  5. dissect/util/_native/compression/lz4.pyi +7 -0
  6. dissect/util/_native/compression/lzo.pyi +3 -0
  7. dissect/util/_native/hash/__init__.py +3 -0
  8. dissect/util/_native/hash/crc32c.py +2 -0
  9. dissect/util/_native.abi3.so +0 -0
  10. dissect/util/compression/__init__.py +45 -0
  11. dissect/util/compression/lz4.py +95 -0
  12. dissect/util/compression/lzbitmap.py +130 -0
  13. dissect/util/compression/lzfse.py +467 -0
  14. dissect/util/compression/lznt1.py +92 -0
  15. dissect/util/compression/lzo.py +118 -0
  16. dissect/util/compression/lzvn.py +241 -0
  17. dissect/util/compression/lzxpress.py +80 -0
  18. dissect/util/compression/lzxpress_huffman.py +184 -0
  19. dissect/util/compression/sevenbit.py +77 -0
  20. dissect/util/compression/xz.py +112 -0
  21. dissect/util/cpio.py +226 -0
  22. dissect/util/encoding/__init__.py +0 -0
  23. dissect/util/encoding/surrogateescape.py +21 -0
  24. dissect/util/exceptions.py +6 -0
  25. dissect/util/hash/__init__.py +28 -0
  26. dissect/util/hash/crc32.py +55 -0
  27. dissect/util/hash/crc32c.py +60 -0
  28. dissect/util/hash/jenkins.py +102 -0
  29. dissect/util/ldap.py +237 -0
  30. dissect/util/plist.py +156 -0
  31. dissect/util/sid.py +81 -0
  32. dissect/util/stream.py +671 -0
  33. dissect/util/tools/__init__.py +0 -0
  34. dissect/util/tools/dump_nskeyedarchiver.py +61 -0
  35. dissect/util/ts.py +295 -0
  36. dissect/util/xmemoryview.py +117 -0
  37. dissect_util-3.24.dev1.dist-info/METADATA +89 -0
  38. dissect_util-3.24.dev1.dist-info/RECORD +43 -0
  39. dissect_util-3.24.dev1.dist-info/WHEEL +5 -0
  40. dissect_util-3.24.dev1.dist-info/entry_points.txt +2 -0
  41. dissect_util-3.24.dev1.dist-info/licenses/COPYRIGHT +5 -0
  42. dissect_util-3.24.dev1.dist-info/licenses/LICENSE +201 -0
  43. dissect_util-3.24.dev1.dist-info/top_level.txt +1 -0
dissect/util/cpio.py ADDED
@@ -0,0 +1,226 @@
1
+ from __future__ import annotations
2
+
3
+ import stat
4
+ import struct
5
+ import tarfile
6
+ from tarfile import InvalidHeaderError
7
+ from typing import BinaryIO
8
+
9
+ FORMAT_CPIO_BIN = 10
10
+ FORMAT_CPIO_ODC = 11
11
+ FORMAT_CPIO_NEWC = 12
12
+ FORMAT_CPIO_CRC = 13
13
+ FORMAT_CPIO_HPBIN = 16
14
+ FORMAT_CPIO_HPODC = 17
15
+ FORMAT_CPIO_UNKNOWN = 18
16
+
17
+ CPIO_MAGIC_OLD = 0o070707
18
+ CPIO_MAGIC_NEW = 0o070701
19
+ CPIO_MAGIC_CRC = 0o070702
20
+
21
+ TYPE_MAP = {
22
+ stat.S_IFREG: tarfile.REGTYPE,
23
+ stat.S_IFDIR: tarfile.DIRTYPE,
24
+ stat.S_IFIFO: tarfile.FIFOTYPE,
25
+ stat.S_IFLNK: tarfile.SYMTYPE,
26
+ stat.S_IFCHR: tarfile.CHRTYPE,
27
+ stat.S_IFBLK: tarfile.BLKTYPE,
28
+ }
29
+
30
+
31
+ class CpioInfo(tarfile.TarInfo):
32
+ """Custom ``TarInfo`` implementation for reading cpio archives.
33
+
34
+ Examples::
35
+
36
+ tarfile.open(..., tarinfo=CpioInfo)
37
+ # or
38
+ tarfile.TarFile(..., tarinfo=CpioInfo)
39
+
40
+ """
41
+
42
+ @classmethod
43
+ def fromtarfile(cls, tarfile: tarfile.TarFile) -> CpioInfo:
44
+ if tarfile.format not in (
45
+ FORMAT_CPIO_BIN,
46
+ FORMAT_CPIO_ODC,
47
+ FORMAT_CPIO_NEWC,
48
+ FORMAT_CPIO_CRC,
49
+ FORMAT_CPIO_HPBIN,
50
+ FORMAT_CPIO_HPODC,
51
+ ):
52
+ tarfile.format = detect_header(tarfile.fileobj)
53
+
54
+ if tarfile.format in (FORMAT_CPIO_BIN, FORMAT_CPIO_HPBIN):
55
+ buf = tarfile.fileobj.read(26)
56
+ elif tarfile.format in (FORMAT_CPIO_ODC, FORMAT_CPIO_HPODC):
57
+ buf = tarfile.fileobj.read(76)
58
+ elif tarfile.format in (FORMAT_CPIO_NEWC, FORMAT_CPIO_CRC):
59
+ buf = tarfile.fileobj.read(110)
60
+ else:
61
+ raise InvalidHeaderError("Unknown cpio type")
62
+
63
+ obj = cls.frombuf(buf, tarfile.format, tarfile.encoding, tarfile.errors)
64
+ obj.format = tarfile.format
65
+ obj.offset = tarfile.fileobj.tell() - len(buf)
66
+ return obj._proc_member(tarfile)
67
+
68
+ @classmethod
69
+ def frombuf(cls, buf: bytes, format: int, encoding: str, errors: str) -> CpioInfo:
70
+ if format in (FORMAT_CPIO_BIN, FORMAT_CPIO_ODC, FORMAT_CPIO_HPBIN, FORMAT_CPIO_HPODC):
71
+ obj = cls._old_frombuf(buf, format)
72
+ elif format in (FORMAT_CPIO_NEWC, FORMAT_CPIO_CRC):
73
+ obj = cls._new_frombuf(buf, format)
74
+
75
+ # Common postprocessing
76
+ ftype = stat.S_IFMT(obj._mode)
77
+ obj.type = TYPE_MAP.get(ftype, ftype)
78
+ obj.mode = stat.S_IMODE(obj._mode)
79
+
80
+ return obj
81
+
82
+ @classmethod
83
+ def _old_frombuf(cls, buf: bytes, format: int) -> CpioInfo:
84
+ if format in (FORMAT_CPIO_BIN, FORMAT_CPIO_HPBIN):
85
+ values = list(struct.unpack("<13H", buf))
86
+ if values[0] == _swap16(CPIO_MAGIC_OLD):
87
+ values = [_swap16(v) for v in values]
88
+
89
+ mtime = (values.pop(8) << 16) | values.pop(8)
90
+ size = (values.pop(9) << 16) | values.pop(9)
91
+ values.insert(8, mtime)
92
+ values.append(size)
93
+ else:
94
+ values = [int(v, 8) for v in struct.unpack("<6s6s6s6s6s6s6s6s11s6s11s", buf)]
95
+
96
+ if values[0] != CPIO_MAGIC_OLD:
97
+ raise InvalidHeaderError(f"Invalid (old) ASCII/binary cpio header magic: {oct(values[0])}")
98
+
99
+ obj = cls()
100
+ obj.devmajor = values[1] >> 8
101
+ obj.devminor = values[1] & 0xFF
102
+ obj._mode = values[3]
103
+ obj.uid = values[4]
104
+ obj.gid = values[5]
105
+ obj.mtime = values[8]
106
+ obj.size = values[10]
107
+
108
+ # Extra fields
109
+ obj.magic = values[0]
110
+ obj.ino = values[2]
111
+ obj.nlink = values[6]
112
+ obj.rdevmajor = values[7] >> 8
113
+ obj.rdevminor = values[7] & 0xFF
114
+ obj.namesize = values[9]
115
+
116
+ # This is a specific case for HP/UX cpio archives, which I'll let this comment from the original source explain:
117
+ # HP/UX cpio creates archives that look just like ordinary archives,
118
+ # but for devices it sets major = 0, minor = 1, and puts the
119
+ # actual major/minor number in the filesize field. See if this
120
+ # is an HP/UX cpio archive, and if so fix it. We have to do this
121
+ # here because process_copy_in() assumes filesize is always 0
122
+ # for devices.
123
+ if (
124
+ stat.S_IFMT(obj.mode) in (stat.S_IFCHR, stat.S_IFBLK, stat.S_IFSOCK, stat.S_IFIFO)
125
+ and obj.size != 0
126
+ and obj.rdevmajor == 0
127
+ and obj.rdevminor == 1
128
+ ):
129
+ obj.rdevmajor = (obj.size >> 8) & 0xFF
130
+ obj.rdevminor = obj.size & 0xFF
131
+ obj.size = 0
132
+
133
+ return obj
134
+
135
+ @classmethod
136
+ def _new_frombuf(cls, buf: bytes, format: int) -> CpioInfo:
137
+ values = struct.unpack("<6s8s8s8s8s8s8s8s8s8s8s8s8s8s", buf)
138
+ values = [int(values[0], 8)] + [int(v, 16) for v in values[1:]]
139
+ if values[0] not in (CPIO_MAGIC_NEW, CPIO_MAGIC_CRC):
140
+ raise InvalidHeaderError(f"Invalid (new) ASCII cpio header magic: {oct(values[0])}")
141
+
142
+ obj = cls()
143
+ obj._mode = values[2]
144
+ obj.uid = values[3]
145
+ obj.gid = values[4]
146
+ obj.mtime = values[6]
147
+ obj.size = values[7]
148
+ obj.devmajor = values[8]
149
+ obj.devminor = values[9]
150
+ obj.chksum = values[13]
151
+
152
+ # Extra fields
153
+ obj.magic = values[0]
154
+ obj.ino = values[1]
155
+ obj.nlink = values[5]
156
+ obj.rdevmajor = values[10]
157
+ obj.rdevminor = values[11]
158
+ obj.namesize = values[12]
159
+
160
+ return obj
161
+
162
+ def _proc_member(self, tarfile: tarfile.TarFile) -> CpioInfo | None:
163
+ self.name = tarfile.fileobj.read(self.namesize - 1).decode(tarfile.encoding, tarfile.errors)
164
+ if self.name == "TRAILER!!!":
165
+ # The last entry in a cpio file has the special name ``TRAILER!!!``, indicating the end of the archive
166
+ return None
167
+
168
+ offset = tarfile.fileobj.tell() + 1
169
+ self.offset_data = self._round_word(offset)
170
+ tarfile.offset = self._round_word(self.offset_data + self.size)
171
+
172
+ if self.issym():
173
+ tarfile.fileobj.seek(self.offset_data)
174
+ self.linkname = tarfile.fileobj.read(self.size).decode(tarfile.encoding, tarfile.errors)
175
+ self.size = 0
176
+
177
+ return self
178
+
179
+ def _round_word(self, offset: int) -> int:
180
+ if self.format in (FORMAT_CPIO_BIN, FORMAT_CPIO_HPBIN):
181
+ return (offset + 1) & ~0x01
182
+
183
+ if self.format in (FORMAT_CPIO_NEWC, FORMAT_CPIO_CRC):
184
+ return (offset + 3) & ~0x03
185
+
186
+ return offset
187
+
188
+ def issocket(self) -> bool:
189
+ """Return True if it is a socket."""
190
+ return self.type == stat.S_IFSOCK
191
+
192
+
193
+ def detect_header(fh: BinaryIO) -> int:
194
+ """Detect a cpio format on a file-like object."""
195
+ offset = fh.tell()
196
+ magic = fh.read(6)
197
+ fh.seek(offset)
198
+
199
+ result = FORMAT_CPIO_UNKNOWN
200
+ if magic == b"070701":
201
+ result = FORMAT_CPIO_NEWC
202
+ elif magic == b"070707":
203
+ result = FORMAT_CPIO_ODC
204
+ elif magic == b"070702":
205
+ result = FORMAT_CPIO_CRC
206
+ elif magic[:2] in (b"\x71\xc7", b"\xc7\x71"):
207
+ # 0o070707 in little and big endian
208
+ result = FORMAT_CPIO_BIN
209
+
210
+ return result
211
+
212
+
213
+ def _swap16(value: int) -> int:
214
+ return ((value & 0xFF) << 8) | (value >> 8)
215
+
216
+
217
+ def CpioFile(*args, **kwargs) -> tarfile.TarFile: # noqa: N802
218
+ """Utility wrapper around ``tarfile.TarFile`` to easily open cpio archives."""
219
+ kwargs.setdefault("format", FORMAT_CPIO_UNKNOWN)
220
+ return tarfile.TarFile(*args, **kwargs, tarinfo=CpioInfo)
221
+
222
+
223
+ def open(*args, **kwargs) -> tarfile.TarFile:
224
+ """Utility wrapper around ``tarfile.open`` to easily open cpio archives."""
225
+ kwargs.setdefault("format", FORMAT_CPIO_UNKNOWN)
226
+ return tarfile.open(*args, **kwargs, tarinfo=CpioInfo)
File without changes
@@ -0,0 +1,21 @@
1
+ import codecs
2
+
3
+
4
+ def error_handler(error: Exception) -> tuple[str, int]:
5
+ if not isinstance(error, UnicodeDecodeError):
6
+ raise error
7
+
8
+ result = []
9
+ for i in range(error.start, error.end):
10
+ byte = error.object[i]
11
+ if byte < 128:
12
+ raise error
13
+ result.append(chr(0xDC00 + byte))
14
+
15
+ return "".join(result), error.end
16
+
17
+
18
+ try:
19
+ codecs.lookup_error("surrogateescape")
20
+ except LookupError:
21
+ codecs.register_error("surrogateescape", error_handler)
@@ -0,0 +1,6 @@
1
+ class Error(Exception):
2
+ pass
3
+
4
+
5
+ class CorruptDataError(Error):
6
+ pass
@@ -0,0 +1,28 @@
1
+ from dissect.util.hash import crc32c
2
+
3
+ crc32c_python = crc32c
4
+
5
+ # This selects between a native Rust version of crc32c (when available) and our own
6
+ # pure-Python implementation.
7
+ #
8
+ # By doing a:
9
+ # from dissect.util.hash import crc32c
10
+ #
11
+ # in another project will automatically give you one or the other.
12
+ #
13
+ # The native Rust version is also available as dissect.util.hash.crc32c_native (when available)
14
+ # and the pure Python version is always available as dissect.util.hash.crc32c_python.
15
+ try:
16
+ from dissect.util import _native
17
+
18
+ crc32c = crc32c_native = _native.hash.crc32c
19
+ except (ImportError, AttributeError):
20
+ crc32c_native = None
21
+
22
+ __all__ = [
23
+ "crc32",
24
+ "crc32c",
25
+ "crc32c_native",
26
+ "crc32c_python",
27
+ "jenkins",
28
+ ]
@@ -0,0 +1,55 @@
1
+ import zlib
2
+ from functools import lru_cache
3
+
4
+
5
+ @lru_cache(maxsize=32)
6
+ def _table(polynomial: int) -> tuple[int, ...]:
7
+ """Generate a CRC32 table for a given (reversed) polynomial.
8
+
9
+ Args:
10
+ polynomial: The (reversed) polynomial to use for the CRC32 calculation.
11
+ """
12
+ table = []
13
+ for i in range(256):
14
+ crc = i
15
+ for _ in range(8):
16
+ if (crc & 1) != 0:
17
+ crc = (crc >> 1) ^ polynomial
18
+ else:
19
+ crc >>= 1
20
+ crc &= 0xFFFFFFFF
21
+ table.append(crc)
22
+ return tuple(table)
23
+
24
+
25
+ def update(crc: int, data: bytes, polynomial: int = 0xEDB88320, table: tuple[int, ...] | None = None) -> int:
26
+ """Update CRC32 checksum with data.
27
+
28
+ Args:
29
+ crc: The initial value of the checksum.
30
+ data: The data to update the checksum with.
31
+ polynomial: The (reversed) polynomial to use for the CRC32 calculation. Default is 0xEDB88320 (crc32b).
32
+ table: Optional precomputed CRC32 table. If None, a table will be generated using the given polynomial.
33
+ """
34
+ if polynomial == 0xEDB88320 and table is None:
35
+ return zlib.crc32(data, crc)
36
+
37
+ if table is None:
38
+ table = _table(polynomial)
39
+
40
+ crc = crc ^ 0xFFFFFFFF
41
+ for b in data:
42
+ crc = table[(crc ^ b) & 0xFF] ^ ((crc >> 8) & 0xFFFFFFFF)
43
+ return crc ^ 0xFFFFFFFF
44
+
45
+
46
+ def crc32(data: bytes, value: int = 0, polynomial: int = 0xEDB88320, table: tuple[int, ...] | None = None) -> int:
47
+ """Calculate CRC32 checksum of some data, with an optional initial value and polynomial.
48
+
49
+ Args:
50
+ data: The data to calculate the checksum of.
51
+ value: The initial value of the checksum. Default is 0.
52
+ polynomial: The (reversed) polynomial to use for the CRC32 calculation. Default is 0xEDB88320 (crc32b).
53
+ table: Optional precomputed CRC32 table. If None, a table will be generated using the given polynomial.
54
+ """
55
+ return update(value, data, polynomial, table) & 0xFFFFFFFF
@@ -0,0 +1,60 @@
1
+ # fmt: off
2
+ _TABLE = (
3
+ 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
4
+ 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
5
+ 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
6
+ 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
7
+ 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
8
+ 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
9
+ 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
10
+ 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
11
+ 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
12
+ 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
13
+ 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
14
+ 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
15
+ 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
16
+ 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
17
+ 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
18
+ 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
19
+ 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
20
+ 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
21
+ 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
22
+ 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
23
+ 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
24
+ 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
25
+ 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
26
+ 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
27
+ 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
28
+ 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
29
+ 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
30
+ 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
31
+ 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
32
+ 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
33
+ 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
34
+ 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
35
+ )
36
+ # fmt: on
37
+
38
+
39
+ def update(crc: int, data: bytes) -> int:
40
+ """Update CRC32C checksum with data.
41
+
42
+ Args:
43
+ crc: The initial value of the checksum.
44
+ data: The data to update the checksum with.
45
+ """
46
+ crc = crc ^ 0xFFFFFFFF
47
+ for b in data:
48
+ table_idx = (crc ^ b) & 0xFF
49
+ crc = _TABLE[table_idx] ^ ((crc >> 8) & 0xFFFFFFFF)
50
+ return crc ^ 0xFFFFFFFF
51
+
52
+
53
+ def crc32c(data: bytes, value: int = 0) -> int:
54
+ """Calculate CRC32C checksum of some data, with an optional initial value.
55
+
56
+ Args:
57
+ data: The data to calculate the checksum of.
58
+ value: The initial value of the checksum. Default is 0.
59
+ """
60
+ return update(value, data) & 0xFFFFFFFF
@@ -0,0 +1,102 @@
1
+ from struct import unpack
2
+
3
+
4
+ def _mix64(a: int, b: int, c: int) -> int:
5
+ """Mixes three 64-bit values reversibly."""
6
+ # Implement logical right shift by masking first
7
+ a = (a - b - c) ^ ((c & 0xFFFFFFFFFFFFFFFF) >> 43)
8
+ b = (b - c - a) ^ (a << 9)
9
+ c = (c - a - b) ^ ((b & 0xFFFFFFFFFFFFFFFF) >> 8)
10
+ a = (a - b - c) ^ ((c & 0xFFFFFFFFFFFFFFFF) >> 38)
11
+ b = (b - c - a) ^ (a << 23)
12
+ c = (c - a - b) ^ ((b & 0xFFFFFFFFFFFFFFFF) >> 5)
13
+ a = (a - b - c) ^ ((c & 0xFFFFFFFFFFFFFFFF) >> 35)
14
+ b = (b - c - a) ^ (a << 49)
15
+ c = (c - a - b) ^ ((b & 0xFFFFFFFFFFFFFFFF) >> 11)
16
+ a = (a - b - c) ^ ((c & 0xFFFFFFFFFFFFFFFF) >> 12)
17
+ b = (b - c - a) ^ (a << 18)
18
+ c = (c - a - b) ^ ((b & 0xFFFFFFFFFFFFFFFF) >> 22)
19
+
20
+ # Normalize to 64 bits
21
+ return a & 0xFFFFFFFFFFFFFFFF, b & 0xFFFFFFFFFFFFFFFF, c & 0xFFFFFFFFFFFFFFFF
22
+
23
+
24
+ def lookup8(key: bytes, level: int) -> int:
25
+ """Hashes a variable-length key into a 64-bit value.
26
+
27
+ This hash function is used in the ESXi kernel.
28
+
29
+ References:
30
+ - http://burtleburtle.net/bob/c/lookup8.c
31
+ """
32
+ a: int = level
33
+ b: int = level
34
+ c: int = 0x9E3779B97F4A7C13 # Golden ratio, arbitrary value
35
+ bytes_left: int = len(key)
36
+ i: int = 0
37
+
38
+ # Process the key in 24-byte chunks
39
+ while bytes_left >= 24:
40
+ a += int.from_bytes(key[i : i + 8], "little")
41
+ b += int.from_bytes(key[i + 8 : i + 16], "little")
42
+ c += int.from_bytes(key[i + 16 : i + 24], "little")
43
+ a, b, c = _mix64(a, b, c)
44
+ i += 24
45
+ bytes_left -= 24
46
+
47
+ # Handle the last 23 bytes
48
+ c = c + len(key)
49
+ if bytes_left > 0:
50
+ for shift, byte in enumerate(key[i:]):
51
+ if shift < 8:
52
+ a += byte << (shift * 8)
53
+ elif shift < 16:
54
+ b += byte << ((shift - 8) * 8)
55
+ else:
56
+ # c takes 23 - 8 - 8 = 7 bytes (length is added to LSB)
57
+ c += byte << ((shift - 15) * 8)
58
+
59
+ _, _, c = _mix64(a, b, c)
60
+ return c
61
+
62
+
63
+ def lookup8_quads(key: bytes, level: int) -> int:
64
+ """Hashes a key consisting of ``num`` 64-bit integers into a 64-bit value.
65
+
66
+ This hash function is used in the ESXi kernel, but unlike :func:`lookup8`, this variant is not compatible with
67
+ any of the original ``lookup8.c`` implementations. The difference between this variant and :func:`lookup8`
68
+ is that in the final step, the value of ``c`` is incremented by the number of quads, not the number
69
+ of bytes in the key. While ``hash2`` in ``lookup8.c`` is also optimized for 64-bit aligned keys,
70
+ (and uses the number of quads as argument for the key size, not bytes) it uses the length of the key
71
+ in bytes to increment ``c`` in the final step.
72
+
73
+ References:
74
+ - http://burtleburtle.net/bob/c/lookup8.c
75
+ - ``HashFunc_HashQuads``
76
+ """
77
+ num = len(key) // 8
78
+ quads = unpack(f"<{num}Q", key)
79
+ remaining = num
80
+
81
+ a = level
82
+ b = level
83
+ c = 0x9E3779B97F4A7C13 # Golden ratio, arbitrary value
84
+ while remaining > 2:
85
+ a += quads[num - remaining]
86
+ b += quads[num - remaining + 1]
87
+ c += quads[num - remaining + 2]
88
+ a, b, c = _mix64(a, b, c)
89
+ remaining -= 3
90
+
91
+ # This is the main difference from lookup8:
92
+ # c is incremented by the number of quads, not the length of the key.
93
+ c = c + num
94
+ if remaining == 2:
95
+ a += quads[num - remaining]
96
+ b += quads[num - remaining + 1]
97
+
98
+ if remaining == 1:
99
+ a += quads[num - remaining]
100
+
101
+ _, _, c = _mix64(a, b, c)
102
+ return c