PyPI - dissect.util - Versions diffs - 3.24.dev4__cp314-cp314t-musllinux_1_2_aarch64.whl - Mend

dissect.util 3.24.dev4__cp314-cp314t-musllinux_1_2_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

dissect/util/__init__.py +20 -0
dissect/util/_build.py +17 -0
dissect/util/_native/__init__.pyi +3 -0
dissect/util/_native/compression/__init__.pyi +3 -0
dissect/util/_native/compression/lz4.pyi +7 -0
dissect/util/_native/compression/lzo.pyi +3 -0
dissect/util/_native/hash/__init__.py +3 -0
dissect/util/_native/hash/crc32c.py +2 -0
dissect/util/_native.cpython-314t-aarch64-linux-musl.so +0 -0
dissect/util/compression/__init__.py +45 -0
dissect/util/compression/lz4.py +95 -0
dissect/util/compression/lzbitmap.py +130 -0
dissect/util/compression/lzfse.py +467 -0
dissect/util/compression/lznt1.py +92 -0
dissect/util/compression/lzo.py +118 -0
dissect/util/compression/lzvn.py +241 -0
dissect/util/compression/lzxpress.py +80 -0
dissect/util/compression/lzxpress_huffman.py +184 -0
dissect/util/compression/sevenbit.py +77 -0
dissect/util/compression/snappy.py +86 -0
dissect/util/compression/xz.py +112 -0
dissect/util/cpio.py +226 -0
dissect/util/encoding/__init__.py +0 -0
dissect/util/encoding/surrogateescape.py +21 -0
dissect/util/exceptions.py +6 -0
dissect/util/hash/__init__.py +28 -0
dissect/util/hash/crc32.py +55 -0
dissect/util/hash/crc32c.py +60 -0
dissect/util/hash/jenkins.py +102 -0
dissect/util/ldap.py +237 -0
dissect/util/plist.py +156 -0
dissect/util/sid.py +81 -0
dissect/util/stream.py +772 -0
dissect/util/tools/__init__.py +0 -0
dissect/util/tools/dump_nskeyedarchiver.py +61 -0
dissect/util/ts.py +295 -0
dissect/util/xmemoryview.py +117 -0
dissect_util-3.24.dev4.dist-info/METADATA +89 -0
dissect_util-3.24.dev4.dist-info/RECORD +46 -0
dissect_util-3.24.dev4.dist-info/WHEEL +5 -0
dissect_util-3.24.dev4.dist-info/entry_points.txt +2 -0
dissect_util-3.24.dev4.dist-info/licenses/COPYRIGHT +5 -0
dissect_util-3.24.dev4.dist-info/licenses/LICENSE +201 -0
dissect_util-3.24.dev4.dist-info/sboms/auditwheel.cdx.json +1 -0
dissect_util-3.24.dev4.dist-info/top_level.txt +1 -0
dissect_util.libs/libgcc_s-2d945d6c.so.1 +0 -0

dissect/util/compression/xz.py ADDED Viewed

@@ -0,0 +1,112 @@
+import io
+from binascii import crc32
+from typing import BinaryIO
+from dissect.util.stream import OverlayStream
+HEADER_FOOTER_SIZE = 12
+CRC_SIZE = 4
+def repair_checksum(fh: BinaryIO) -> BinaryIO:
+    """Repair CRC32 checksums for all headers in an XZ stream.
+    FortiOS XZ files have (on purpose) corrupt streams which they read using a modified ``xz`` binary.
+    The only thing changed are the CRC32 checksums, so partially parse the XZ file and fix all of them.
+    References:
+        - https://tukaani.org/xz/xz-file-format-1.1.0.txt
+        - https://github.com/Rogdham/python-xz
+    Args:
+        fh: A file-like object of an LZMA stream to repair.
+    """
+    file_size = fh.seek(0, io.SEEK_END)
+    repaired = OverlayStream(fh, file_size)
+    fh.seek(0)
+    header = fh.read(HEADER_FOOTER_SIZE)
+    # Check header magic
+    magic = b"\xfd7zXZ\x00"
+    if header[: len(magic)] != magic:
+        raise ValueError("Not an XZ file")
+    # Add correct header CRC32
+    repaired.add(fh.tell() - CRC_SIZE, _crc32(header[len(magic) : HEADER_FOOTER_SIZE - CRC_SIZE]))
+    footer_offset = fh.seek(-HEADER_FOOTER_SIZE, io.SEEK_END)
+    footer = fh.read(HEADER_FOOTER_SIZE)
+    # Check footer magic
+    footer_magic = b"YZ"
+    if footer[HEADER_FOOTER_SIZE - len(footer_magic) : HEADER_FOOTER_SIZE] != footer_magic:
+        raise ValueError("Not an XZ file")
+    # Add correct footer CRC32
+    repaired.add(footer_offset, _crc32(footer[CRC_SIZE : HEADER_FOOTER_SIZE - len(footer_magic)]))
+    backward_size = (int.from_bytes(footer[4:8], "little") + 1) * 4
+    fh.seek(-HEADER_FOOTER_SIZE - backward_size, io.SEEK_END)
+    index = fh.read(backward_size)
+    # Add correct index CRC32
+    repaired.add(fh.tell() - CRC_SIZE, _crc32(index[:-CRC_SIZE]))
+    # Parse the index
+    isize, num_records = _mbi(index[1:])
+    index = index[1 + isize : -4]
+    records = []
+    for _ in range(num_records):
+        if not index:
+            raise ValueError("Missing index size")
+        isize, unpadded_size = _mbi(index)
+        if not unpadded_size:
+            raise ValueError("Missing index record unpadded size")
+        index = index[isize:]
+        if not index:
+            raise ValueError("Missing index size")
+        isize, uncompressed_size = _mbi(index)
+        if not uncompressed_size:
+            raise ValueError("Missing index record uncompressed size")
+        index = index[isize:]
+        records.append((unpadded_size, uncompressed_size))
+    block_start = file_size - HEADER_FOOTER_SIZE - backward_size
+    blocks_len = sum((unpadded_size + 3) & ~3 for unpadded_size, _ in records)
+    block_start -= blocks_len
+    # Iterate over all blocks and add the correct block header CRC32
+    for unpadded_size, _ in records:
+        fh.seek(block_start)
+        block_header = fh.read(1)
+        block_header_size = (block_header[0] + 1) * 4
+        block_header += fh.read(block_header_size - 1)
+        repaired.add(fh.tell() - CRC_SIZE, _crc32(block_header[:-CRC_SIZE]))
+        block_start += (unpadded_size + 3) & ~3
+    return repaired
+def _mbi(data: bytes) -> tuple[int, int]:
+    """Decode a multibyte integer.
+    The encoding is similar to most other "varint" encodings. For each byte, the 7 least significant bits are used for
+    the integer value. The most significant bit is used to indicate if the integer continues in the next byte.
+    Bytes are ordered in little endian byte order, meaning the least significant byte comes first.
+    """
+    value = 0
+    for size, byte in enumerate(data):
+        value |= (byte & 0x7F) << (size * 7)
+        if not byte & 0x80:
+            return size + 1, value
+    raise ValueError("Invalid mbi")
+def _crc32(data: bytes) -> bytes:
+    return int.to_bytes(crc32(data), CRC_SIZE, "little")

dissect/util/cpio.py ADDED Viewed

@@ -0,0 +1,226 @@
+from __future__ import annotations
+import stat
+import struct
+import tarfile
+from tarfile import InvalidHeaderError
+from typing import BinaryIO
+FORMAT_CPIO_BIN = 10
+FORMAT_CPIO_ODC = 11
+FORMAT_CPIO_NEWC = 12
+FORMAT_CPIO_CRC = 13
+FORMAT_CPIO_HPBIN = 16
+FORMAT_CPIO_HPODC = 17
+FORMAT_CPIO_UNKNOWN = 18
+CPIO_MAGIC_OLD = 0o070707
+CPIO_MAGIC_NEW = 0o070701
+CPIO_MAGIC_CRC = 0o070702
+TYPE_MAP = {
+    stat.S_IFREG: tarfile.REGTYPE,
+    stat.S_IFDIR: tarfile.DIRTYPE,
+    stat.S_IFIFO: tarfile.FIFOTYPE,
+    stat.S_IFLNK: tarfile.SYMTYPE,
+    stat.S_IFCHR: tarfile.CHRTYPE,
+    stat.S_IFBLK: tarfile.BLKTYPE,
+}
+class CpioInfo(tarfile.TarInfo):
+    """Custom ``TarInfo`` implementation for reading cpio archives.
+    Examples::
+        tarfile.open(..., tarinfo=CpioInfo)
+        # or
+        tarfile.TarFile(..., tarinfo=CpioInfo)
+    """
+    @classmethod
+    def fromtarfile(cls, tarfile: tarfile.TarFile) -> CpioInfo:
+        if tarfile.format not in (
+            FORMAT_CPIO_BIN,
+            FORMAT_CPIO_ODC,
+            FORMAT_CPIO_NEWC,
+            FORMAT_CPIO_CRC,
+            FORMAT_CPIO_HPBIN,
+            FORMAT_CPIO_HPODC,
+        ):
+            tarfile.format = detect_header(tarfile.fileobj)
+        if tarfile.format in (FORMAT_CPIO_BIN, FORMAT_CPIO_HPBIN):
+            buf = tarfile.fileobj.read(26)
+        elif tarfile.format in (FORMAT_CPIO_ODC, FORMAT_CPIO_HPODC):
+            buf = tarfile.fileobj.read(76)
+        elif tarfile.format in (FORMAT_CPIO_NEWC, FORMAT_CPIO_CRC):
+            buf = tarfile.fileobj.read(110)
+        else:
+            raise InvalidHeaderError("Unknown cpio type")
+        obj = cls.frombuf(buf, tarfile.format, tarfile.encoding, tarfile.errors)
+        obj.format = tarfile.format
+        obj.offset = tarfile.fileobj.tell() - len(buf)
+        return obj._proc_member(tarfile)
+    @classmethod
+    def frombuf(cls, buf: bytes, format: int, encoding: str, errors: str) -> CpioInfo:
+        if format in (FORMAT_CPIO_BIN, FORMAT_CPIO_ODC, FORMAT_CPIO_HPBIN, FORMAT_CPIO_HPODC):
+            obj = cls._old_frombuf(buf, format)
+        elif format in (FORMAT_CPIO_NEWC, FORMAT_CPIO_CRC):
+            obj = cls._new_frombuf(buf, format)
+        # Common postprocessing
+        ftype = stat.S_IFMT(obj._mode)
+        obj.type = TYPE_MAP.get(ftype, ftype)
+        obj.mode = stat.S_IMODE(obj._mode)
+        return obj
+    @classmethod
+    def _old_frombuf(cls, buf: bytes, format: int) -> CpioInfo:
+        if format in (FORMAT_CPIO_BIN, FORMAT_CPIO_HPBIN):
+            values = list(struct.unpack("<13H", buf))
+            if values[0] == _swap16(CPIO_MAGIC_OLD):
+                values = [_swap16(v) for v in values]
+            mtime = (values.pop(8) << 16) | values.pop(8)
+            size = (values.pop(9) << 16) | values.pop(9)
+            values.insert(8, mtime)
+            values.append(size)
+        else:
+            values = [int(v, 8) for v in struct.unpack("<6s6s6s6s6s6s6s6s11s6s11s", buf)]
+        if values[0] != CPIO_MAGIC_OLD:
+            raise InvalidHeaderError(f"Invalid (old) ASCII/binary cpio header magic: {oct(values[0])}")
+        obj = cls()
+        obj.devmajor = values[1] >> 8
+        obj.devminor = values[1] & 0xFF
+        obj._mode = values[3]
+        obj.uid = values[4]
+        obj.gid = values[5]
+        obj.mtime = values[8]
+        obj.size = values[10]
+        # Extra fields
+        obj.magic = values[0]
+        obj.ino = values[2]
+        obj.nlink = values[6]
+        obj.rdevmajor = values[7] >> 8
+        obj.rdevminor = values[7] & 0xFF
+        obj.namesize = values[9]
+        # This is a specific case for HP/UX cpio archives, which I'll let this comment from the original source explain:
+        # HP/UX cpio creates archives that look just like ordinary archives,
+        # but for devices it sets major = 0, minor = 1, and puts the
+        # actual major/minor number in the filesize field.  See if this
+        # is an HP/UX cpio archive, and if so fix it.  We have to do this
+        # here because process_copy_in() assumes filesize is always 0
+        # for devices.
+        if (
+            stat.S_IFMT(obj.mode) in (stat.S_IFCHR, stat.S_IFBLK, stat.S_IFSOCK, stat.S_IFIFO)
+            and obj.size != 0
+            and obj.rdevmajor == 0
+            and obj.rdevminor == 1
+        ):
+            obj.rdevmajor = (obj.size >> 8) & 0xFF
+            obj.rdevminor = obj.size & 0xFF
+            obj.size = 0
+        return obj
+    @classmethod
+    def _new_frombuf(cls, buf: bytes, format: int) -> CpioInfo:
+        values = struct.unpack("<6s8s8s8s8s8s8s8s8s8s8s8s8s8s", buf)
+        values = [int(values[0], 8)] + [int(v, 16) for v in values[1:]]
+        if values[0] not in (CPIO_MAGIC_NEW, CPIO_MAGIC_CRC):
+            raise InvalidHeaderError(f"Invalid (new) ASCII cpio header magic: {oct(values[0])}")
+        obj = cls()
+        obj._mode = values[2]
+        obj.uid = values[3]
+        obj.gid = values[4]
+        obj.mtime = values[6]
+        obj.size = values[7]
+        obj.devmajor = values[8]
+        obj.devminor = values[9]
+        obj.chksum = values[13]
+        # Extra fields
+        obj.magic = values[0]
+        obj.ino = values[1]
+        obj.nlink = values[5]
+        obj.rdevmajor = values[10]
+        obj.rdevminor = values[11]
+        obj.namesize = values[12]
+        return obj
+    def _proc_member(self, tarfile: tarfile.TarFile) -> CpioInfo | None:
+        self.name = tarfile.fileobj.read(self.namesize - 1).decode(tarfile.encoding, tarfile.errors)
+        if self.name == "TRAILER!!!":
+            # The last entry in a cpio file has the special name ``TRAILER!!!``, indicating the end of the archive
+            return None
+        offset = tarfile.fileobj.tell() + 1
+        self.offset_data = self._round_word(offset)
+        tarfile.offset = self._round_word(self.offset_data + self.size)
+        if self.issym():
+            tarfile.fileobj.seek(self.offset_data)
+            self.linkname = tarfile.fileobj.read(self.size).decode(tarfile.encoding, tarfile.errors)
+            self.size = 0
+        return self
+    def _round_word(self, offset: int) -> int:
+        if self.format in (FORMAT_CPIO_BIN, FORMAT_CPIO_HPBIN):
+            return (offset + 1) & ~0x01
+        if self.format in (FORMAT_CPIO_NEWC, FORMAT_CPIO_CRC):
+            return (offset + 3) & ~0x03
+        return offset
+    def issocket(self) -> bool:
+        """Return True if it is a socket."""
+        return self.type == stat.S_IFSOCK
+def detect_header(fh: BinaryIO) -> int:
+    """Detect a cpio format on a file-like object."""
+    offset = fh.tell()
+    magic = fh.read(6)
+    fh.seek(offset)
+    result = FORMAT_CPIO_UNKNOWN
+    if magic == b"070701":
+        result = FORMAT_CPIO_NEWC
+    elif magic == b"070707":
+        result = FORMAT_CPIO_ODC
+    elif magic == b"070702":
+        result = FORMAT_CPIO_CRC
+    elif magic[:2] in (b"\x71\xc7", b"\xc7\x71"):
+        # 0o070707 in little and big endian
+        result = FORMAT_CPIO_BIN
+    return result
+def _swap16(value: int) -> int:
+    return ((value & 0xFF) << 8) | (value >> 8)
+def CpioFile(*args, **kwargs) -> tarfile.TarFile:  # noqa: N802
+    """Utility wrapper around ``tarfile.TarFile`` to easily open cpio archives."""
+    kwargs.setdefault("format", FORMAT_CPIO_UNKNOWN)
+    return tarfile.TarFile(*args, **kwargs, tarinfo=CpioInfo)
+def open(*args, **kwargs) -> tarfile.TarFile:
+    """Utility wrapper around ``tarfile.open`` to easily open cpio archives."""
+    kwargs.setdefault("format", FORMAT_CPIO_UNKNOWN)
+    return tarfile.open(*args, **kwargs, tarinfo=CpioInfo)

dissect/util/encoding/__init__.py ADDED Viewed

File without changes

dissect/util/encoding/surrogateescape.py ADDED Viewed

@@ -0,0 +1,21 @@
+import codecs
+def error_handler(error: Exception) -> tuple[str, int]:
+    if not isinstance(error, UnicodeDecodeError):
+        raise error
+    result = []
+    for i in range(error.start, error.end):
+        byte = error.object[i]
+        if byte < 128:
+            raise error
+        result.append(chr(0xDC00 + byte))
+    return "".join(result), error.end
+try:
+    codecs.lookup_error("surrogateescape")
+except LookupError:
+    codecs.register_error("surrogateescape", error_handler)

dissect/util/exceptions.py ADDED Viewed

@@ -0,0 +1,6 @@
+class Error(Exception):
+    pass
+class CorruptDataError(Error):
+    pass

dissect/util/hash/__init__.py ADDED Viewed

@@ -0,0 +1,28 @@
+from dissect.util.hash import crc32c
+crc32c_python = crc32c
+# This selects between a native Rust version of crc32c (when available) and our own
+# pure-Python implementation.
+#
+# By doing a:
+#  from dissect.util.hash import crc32c
+#
+# in another project will automatically give you one or the other.
+#
+# The native Rust version is also available as dissect.util.hash.crc32c_native (when available)
+# and the pure Python version is always available as dissect.util.hash.crc32c_python.
+try:
+    from dissect.util import _native
+    crc32c = crc32c_native = _native.hash.crc32c
+except (ImportError, AttributeError):
+    crc32c_native = None
+__all__ = [
+    "crc32",
+    "crc32c",
+    "crc32c_native",
+    "crc32c_python",
+    "jenkins",
+]

dissect/util/hash/crc32.py ADDED Viewed

@@ -0,0 +1,55 @@
+import zlib
+from functools import lru_cache
+@lru_cache(maxsize=32)
+def _table(polynomial: int) -> tuple[int, ...]:
+    """Generate a CRC32 table for a given (reversed) polynomial.
+    Args:
+        polynomial: The (reversed) polynomial to use for the CRC32 calculation.
+    """
+    table = []
+    for i in range(256):
+        crc = i
+        for _ in range(8):
+            if (crc & 1) != 0:
+                crc = (crc >> 1) ^ polynomial
+            else:
+                crc >>= 1
+            crc &= 0xFFFFFFFF
+        table.append(crc)
+    return tuple(table)
+def update(crc: int, data: bytes, polynomial: int = 0xEDB88320, table: tuple[int, ...] | None = None) -> int:
+    """Update CRC32 checksum with data.
+    Args:
+        crc: The initial value of the checksum.
+        data: The data to update the checksum with.
+        polynomial: The (reversed) polynomial to use for the CRC32 calculation. Default is 0xEDB88320 (crc32b).
+        table: Optional precomputed CRC32 table. If None, a table will be generated using the given polynomial.
+    """
+    if polynomial == 0xEDB88320 and table is None:
+        return zlib.crc32(data, crc)
+    if table is None:
+        table = _table(polynomial)
+    crc = crc ^ 0xFFFFFFFF
+    for b in data:
+        crc = table[(crc ^ b) & 0xFF] ^ ((crc >> 8) & 0xFFFFFFFF)
+    return crc ^ 0xFFFFFFFF
+def crc32(data: bytes, value: int = 0, polynomial: int = 0xEDB88320, table: tuple[int, ...] | None = None) -> int:
+    """Calculate CRC32 checksum of some data, with an optional initial value and polynomial.
+    Args:
+        data: The data to calculate the checksum of.
+        value: The initial value of the checksum. Default is 0.
+        polynomial: The (reversed) polynomial to use for the CRC32 calculation. Default is 0xEDB88320 (crc32b).
+        table: Optional precomputed CRC32 table. If None, a table will be generated using the given polynomial.
+    """
+    return update(value, data, polynomial, table) & 0xFFFFFFFF

dissect/util/hash/crc32c.py ADDED Viewed

@@ -0,0 +1,60 @@
+# fmt: off
+_TABLE = (
+    0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
+    0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
+    0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
+    0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
+    0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
+    0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
+    0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
+    0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
+    0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
+    0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
+    0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
+    0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
+    0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
+    0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
+    0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
+    0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
+    0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
+    0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
+    0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
+    0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
+    0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
+    0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
+    0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
+    0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
+    0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
+    0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
+    0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
+    0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
+    0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
+    0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
+    0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
+    0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
+)
+# fmt: on
+def update(crc: int, data: bytes) -> int:
+    """Update CRC32C checksum with data.
+    Args:
+        crc: The initial value of the checksum.
+        data: The data to update the checksum with.
+    """
+    crc = crc ^ 0xFFFFFFFF
+    for b in data:
+        table_idx = (crc ^ b) & 0xFF
+        crc = _TABLE[table_idx] ^ ((crc >> 8) & 0xFFFFFFFF)
+    return crc ^ 0xFFFFFFFF
+def crc32c(data: bytes, value: int = 0) -> int:
+    """Calculate CRC32C checksum of some data, with an optional initial value.
+    Args:
+        data: The data to calculate the checksum of.
+        value: The initial value of the checksum. Default is 0.
+    """
+    return update(value, data) & 0xFFFFFFFF

dissect/util/hash/jenkins.py ADDED Viewed

@@ -0,0 +1,102 @@
+from struct import unpack
+def _mix64(a: int, b: int, c: int) -> int:
+    """Mixes three 64-bit values reversibly."""
+    # Implement logical right shift by masking first
+    a = (a - b - c) ^ ((c & 0xFFFFFFFFFFFFFFFF) >> 43)
+    b = (b - c - a) ^ (a << 9)
+    c = (c - a - b) ^ ((b & 0xFFFFFFFFFFFFFFFF) >> 8)
+    a = (a - b - c) ^ ((c & 0xFFFFFFFFFFFFFFFF) >> 38)
+    b = (b - c - a) ^ (a << 23)
+    c = (c - a - b) ^ ((b & 0xFFFFFFFFFFFFFFFF) >> 5)
+    a = (a - b - c) ^ ((c & 0xFFFFFFFFFFFFFFFF) >> 35)
+    b = (b - c - a) ^ (a << 49)
+    c = (c - a - b) ^ ((b & 0xFFFFFFFFFFFFFFFF) >> 11)
+    a = (a - b - c) ^ ((c & 0xFFFFFFFFFFFFFFFF) >> 12)
+    b = (b - c - a) ^ (a << 18)
+    c = (c - a - b) ^ ((b & 0xFFFFFFFFFFFFFFFF) >> 22)
+    # Normalize to 64 bits
+    return a & 0xFFFFFFFFFFFFFFFF, b & 0xFFFFFFFFFFFFFFFF, c & 0xFFFFFFFFFFFFFFFF
+def lookup8(key: bytes, level: int) -> int:
+    """Hashes a variable-length key into a 64-bit value.
+    This hash function is used in the ESXi kernel.
+    References:
+        - http://burtleburtle.net/bob/c/lookup8.c
+    """
+    a: int = level
+    b: int = level
+    c: int = 0x9E3779B97F4A7C13  # Golden ratio, arbitrary value
+    bytes_left: int = len(key)
+    i: int = 0
+    # Process the key in 24-byte chunks
+    while bytes_left >= 24:
+        a += int.from_bytes(key[i : i + 8], "little")
+        b += int.from_bytes(key[i + 8 : i + 16], "little")
+        c += int.from_bytes(key[i + 16 : i + 24], "little")
+        a, b, c = _mix64(a, b, c)
+        i += 24
+        bytes_left -= 24
+    # Handle the last 23 bytes
+    c = c + len(key)
+    if bytes_left > 0:
+        for shift, byte in enumerate(key[i:]):
+            if shift < 8:
+                a += byte << (shift * 8)
+            elif shift < 16:
+                b += byte << ((shift - 8) * 8)
+            else:
+                # c takes 23 - 8 - 8 = 7 bytes (length is added to LSB)
+                c += byte << ((shift - 15) * 8)
+    _, _, c = _mix64(a, b, c)
+    return c
+def lookup8_quads(key: bytes, level: int) -> int:
+    """Hashes a key consisting of ``num`` 64-bit integers into a 64-bit value.
+    This hash function is used in the ESXi kernel, but unlike :func:`lookup8`, this variant is not compatible with
+    any of the original ``lookup8.c`` implementations. The difference between this variant and :func:`lookup8`
+    is that in the final step, the value of ``c`` is incremented by the number of quads, not the number
+    of bytes in the key. While ``hash2`` in ``lookup8.c`` is also optimized for 64-bit aligned keys,
+    (and uses the number of quads as argument for the key size, not bytes) it uses the length of the key
+    in bytes to increment ``c`` in the final step.
+    References:
+        - http://burtleburtle.net/bob/c/lookup8.c
+        - ``HashFunc_HashQuads``
+    """
+    num = len(key) // 8
+    quads = unpack(f"<{num}Q", key)
+    remaining = num
+    a = level
+    b = level
+    c = 0x9E3779B97F4A7C13  # Golden ratio, arbitrary value
+    while remaining > 2:
+        a += quads[num - remaining]
+        b += quads[num - remaining + 1]
+        c += quads[num - remaining + 2]
+        a, b, c = _mix64(a, b, c)
+        remaining -= 3
+    # This is the main difference from lookup8:
+    # c is incremented by the number of quads, not the length of the key.
+    c = c + num
+    if remaining == 2:
+        a += quads[num - remaining]
+        b += quads[num - remaining + 1]
+    if remaining == 1:
+        a += quads[num - remaining]
+    _, _, c = _mix64(a, b, c)
+    return c