PyPI - dissect.util - Versions diffs - 3.24.dev1__cp314-cp314t-manylinux_2_28_s390x.whl - Mend

dissect.util 3.24.dev1__cp314-cp314t-manylinux_2_28_s390x.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

dissect/util/__init__.py +20 -0
dissect/util/_build.py +17 -0
dissect/util/_native/__init__.pyi +3 -0
dissect/util/_native/compression/__init__.pyi +3 -0
dissect/util/_native/compression/lz4.pyi +7 -0
dissect/util/_native/compression/lzo.pyi +3 -0
dissect/util/_native/hash/__init__.py +3 -0
dissect/util/_native/hash/crc32c.py +2 -0
dissect/util/_native.cpython-314t-s390x-linux-gnu.so +0 -0
dissect/util/compression/__init__.py +45 -0
dissect/util/compression/lz4.py +95 -0
dissect/util/compression/lzbitmap.py +130 -0
dissect/util/compression/lzfse.py +467 -0
dissect/util/compression/lznt1.py +92 -0
dissect/util/compression/lzo.py +118 -0
dissect/util/compression/lzvn.py +241 -0
dissect/util/compression/lzxpress.py +80 -0
dissect/util/compression/lzxpress_huffman.py +184 -0
dissect/util/compression/sevenbit.py +77 -0
dissect/util/compression/xz.py +112 -0
dissect/util/cpio.py +226 -0
dissect/util/encoding/__init__.py +0 -0
dissect/util/encoding/surrogateescape.py +21 -0
dissect/util/exceptions.py +6 -0
dissect/util/hash/__init__.py +28 -0
dissect/util/hash/crc32.py +55 -0
dissect/util/hash/crc32c.py +60 -0
dissect/util/hash/jenkins.py +102 -0
dissect/util/ldap.py +237 -0
dissect/util/plist.py +156 -0
dissect/util/sid.py +81 -0
dissect/util/stream.py +671 -0
dissect/util/tools/__init__.py +0 -0
dissect/util/tools/dump_nskeyedarchiver.py +61 -0
dissect/util/ts.py +295 -0
dissect/util/xmemoryview.py +117 -0
dissect_util-3.24.dev1.dist-info/METADATA +89 -0
dissect_util-3.24.dev1.dist-info/RECORD +43 -0
dissect_util-3.24.dev1.dist-info/WHEEL +5 -0
dissect_util-3.24.dev1.dist-info/entry_points.txt +2 -0
dissect_util-3.24.dev1.dist-info/licenses/COPYRIGHT +5 -0
dissect_util-3.24.dev1.dist-info/licenses/LICENSE +201 -0
dissect_util-3.24.dev1.dist-info/top_level.txt +1 -0

dissect/util/compression/lzvn.py ADDED Viewed

@@ -0,0 +1,241 @@
+# References:
+# - https://github.com/lzfse/lzfse
+from __future__ import annotations
+import io
+import struct
+from typing import BinaryIO
+# fmt: off
+OP_SML_D = (
+      0,   1,   2,   3,   4,   5,   8,   9,  10,  11,  12,  13,  16,  17,  18,  19,
+     20,  21,  24,  25,  26,  27,  28,  29,  32,  33,  34,  35,  36,  37,  40,  41,
+     42,  43,  44,  45,  48,  49,  50,  51,  52,  53,  56,  57,  58,  59,  60,  61,
+     64,  65,  66,  67,  68,  69,  72,  73,  74,  75,  76,  77,  80,  81,  82,  83,
+     84,  85,  88,  89,  90,  91,  92,  93,  96,  97,  98,  99, 100, 101, 104, 105,
+    106, 107, 108, 109, 128, 129, 130, 131, 132, 133, 136, 137, 138, 139, 140, 141,
+    144, 145, 146, 147, 148, 149, 152, 153, 154, 155, 156, 157, 192, 193, 194, 195,
+    196, 197, 200, 201, 202, 203, 204, 205,
+)
+OP_MED_D = (
+    160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
+    176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
+)
+OP_LRG_D = (
+      7,  15,  23,  31,  39,  47,  55,  63,  71,  79,  87,  95, 103, 111, 135, 143,
+    151, 159, 199, 207,
+)
+OP_PRE_D = (
+     70,  78,  86,  94, 102, 110, 134, 142, 150, 158, 198, 206,
+)
+OP_SML_M = (
+    241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
+)
+OP_LRG_M = (
+    240,
+)
+OP_SML_L = (
+    225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+)
+OP_LRG_L = (
+    224,
+)
+OP_NOP = (
+     14,  22,
+)
+OP_EOS = (
+    6,
+)
+OP_UDEF = (
+     30,  38,  46,  54,  62, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
+    122, 123, 124, 125, 126, 127, 208, 209, 210, 211, 212, 213, 214, 215, 216,
+    217, 218, 219, 220, 221, 222, 223,
+)
+# fmt: on
+_H = struct.Struct("<H")
+def decompress(src: bytes | BinaryIO) -> bytes:
+    """LZVN decompress from a file-like object or bytes.
+    Decompresses until EOF or EOS of the input data.
+    Args:
+        src: File-like object or bytes to decompress.
+    Returns:
+        The decompressed data.
+    """
+    if not hasattr(src, "read"):
+        src = io.BytesIO(src)
+    offset = src.tell()
+    src.seek(0, io.SEEK_END)
+    src_size = src.tell() - offset
+    src.seek(offset)
+    dst = bytearray()
+    opc_len = 0
+    # ruff: noqa: N806
+    L = None
+    M = None
+    D = 0
+    while src_size > 0:
+        opc = src.read(1)[0]
+        if opc in OP_SML_D:
+            # "small distance": This opcode has the structure LLMMMDDD DDDDDDDD LITERAL
+            # where the length of literal (0-3 bytes) is encoded by the high 2 bits of
+            # the first byte. We first extract the literal length so we know how long
+            # the opcode is, then check that the source can hold both this opcode and
+            # at least one byte of the next (because any valid input stream must be
+            # terminated with an eos token).
+            opc_len = 2
+            L = _extract(opc, 8, 6, 2)
+            M = _extract(opc, 8, 3, 3) + 3
+            if src_size <= opc_len + L:
+                break
+            D = _extract(opc, 8, 0, 3) << 8 | src.read(1)[0]
+        elif opc in OP_MED_D:
+            # "medium distance": This is a minor variant of the "small distance"
+            # encoding, where we will now use two extra bytes instead of one to encode
+            # the restof the match length and distance. This allows an extra two bits
+            # for the match length, and an extra three bits for the match distance. The
+            # full structure of the opcode is 101LLMMM DDDDDDMM DDDDDDDD LITERAL.
+            opc_len = 3
+            L = _extract(opc, 8, 3, 2)
+            if src_size <= opc_len + L:
+                break
+            (opc23,) = _H.unpack(src.read(2))
+            M = (_extract(opc, 8, 0, 3) << 2 | _extract(opc23, 16, 0, 2)) + 3
+            D = _extract(opc23, 16, 2, 14)
+        elif opc in OP_LRG_D:
+            # "large distance": This is another variant of the "small distance"
+            # encoding, where we will now use two extra bytes to encode the match
+            # distance, which allows distances up to 65535 to be represented. The full
+            # structure of the opcode is LLMMM111 DDDDDDDD DDDDDDDD LITERAL.
+            opc_len = 3
+            L = _extract(opc, 8, 6, 2)
+            M = _extract(opc, 8, 3, 3) + 3
+            if src_size <= opc_len + L:
+                break
+            (D,) = _H.unpack(src.read(2))
+        elif opc in OP_PRE_D:
+            # "previous distance": This opcode has the structure LLMMM110, where the
+            # length of the literal (0-3 bytes) is encoded by the high 2 bits of the
+            # first byte. We first extract the literal length so we know how long
+            # the opcode is, then check that the source can hold both this opcode and
+            # at least one byte of the next (because any valid input stream must be
+            # terminated with an eos token).
+            opc_len = 1
+            L = _extract(opc, 8, 6, 2)
+            M = _extract(opc, 8, 3, 3) + 3
+            if src_size <= opc_len + L:
+                break
+        elif opc in OP_SML_M:
+            # "small match": This opcode has no literal, and uses the previous match
+            # distance (i.e. it encodes only the match length), in a single byte as
+            # 1111MMMM.
+            opc_len = 1
+            L = None
+            M = _extract(opc, 8, 0, 4)
+            if src_size <= opc_len:
+                break
+        elif opc in OP_LRG_M:
+            # "large match": This opcode has no literal, and uses the previous match
+            # distance (i.e. it encodes only the match length). It is encoded in two
+            # bytes as 11110000 MMMMMMMM.  Because matches smaller than 16 bytes can
+            # be represented by sml_m, there is an implicit bias of 16 on the match
+            # length; the representable values are [16,271].
+            opc_len = 2
+            L = None
+            if src_size <= opc_len:
+                break
+            M = src.read(1)[0] + 16
+        elif opc in OP_SML_L:
+            # "small literal": This opcode has no match, and encodes only a literal
+            # of length up to 15 bytes. The format is 1110LLLL LITERAL.
+            opc_len = 1
+            L = _extract(opc, 8, 0, 4)
+            M = None
+        elif opc in OP_LRG_L:
+            # "large literal": This opcode has no match, and uses the previous match
+            # distance (i.e. it encodes only the match length). It is encoded in two
+            # bytes as 11100000 LLLLLLLL LITERAL.  Because literals smaller than 16
+            # bytes can be represented by sml_l, there is an implicit bias of 16 on
+            # the literal length; the representable values are [16,271].
+            opc_len = 2
+            if src_size <= opc_len:
+                break
+            L = src.read(1)[0] + 16
+            M = None
+        elif opc in OP_NOP:
+            opc_len = 1
+            L = None
+            M = None
+            if src_size <= opc_len:
+                break
+        elif opc in OP_EOS:
+            opc_len = 8
+            if src_size < opc_len:
+                break
+            src_size -= opc_len + L
+            break
+        elif opc in OP_UDEF:
+            raise ValueError("Undefined opcode")
+        # Update remaining source size
+        src_size -= opc_len
+        # Copy literal
+        if L is not None:
+            src_size -= L
+            dst += src.read(L)
+        # Match
+        if M is not None:
+            if len(dst) < D or D == 0:
+                raise ValueError("Invalid match distance")
+            remaining = M
+            while remaining > 0:
+                match_size = min(remaining, D)
+                dst += dst[-D : (-D + match_size) or None]
+                remaining -= match_size
+    return bytes(dst)
+def _extract(container: int, container_width: int, lsb: int, width: int) -> int:
+    if width == container_width:
+        return container
+    return (container >> lsb) & ((1 << width) - 1)

dissect/util/compression/lzxpress.py ADDED Viewed

@@ -0,0 +1,80 @@
+# Reference: [MS-XCA]
+from __future__ import annotations
+import io
+import struct
+from typing import BinaryIO
+def decompress(src: bytes | BinaryIO) -> bytes:
+    """LZXPRESS decompress from a file-like object or bytes.
+    Args:
+        src: File-like object or bytes to decompress.
+    Returns:
+        The decompressed data.
+    """
+    if not hasattr(src, "read"):
+        src = io.BytesIO(src)
+    offset = src.tell()
+    src.seek(0, io.SEEK_END)
+    size = src.tell() - offset
+    src.seek(offset)
+    dst = bytearray()
+    buffered_flags = 0
+    buffered_flags_count = 0
+    last_length_half_byte = 0
+    while src.tell() - offset < size:
+        if buffered_flags_count == 0:
+            buffered_flags = struct.unpack("<I", src.read(4))[0]
+            buffered_flags_count = 32
+        buffered_flags_count -= 1
+        if buffered_flags & (1 << buffered_flags_count) == 0:
+            dst.append(ord(src.read(1)))
+        else:
+            if src.tell() - offset == size:
+                break
+            match = struct.unpack("<H", src.read(2))[0]
+            match_offset, match_length = divmod(match, 8)
+            match_offset += 1
+            if match_length == 7:
+                if last_length_half_byte == 0:
+                    last_length_half_byte = src.tell()
+                    match_length = ord(src.read(1)) % 16
+                else:
+                    rewind = src.tell()
+                    src.seek(last_length_half_byte)
+                    match_length = ord(src.read(1)) // 16
+                    src.seek(rewind)
+                    last_length_half_byte = 0
+                if match_length == 15:
+                    match_length = ord(src.read(1))
+                    if match_length == 255:
+                        match_length = struct.unpack("<H", src.read(2))[0]
+                        if match_length == 0:
+                            match_length = struct.unpack("<I", src.read(4))[0]
+                        if match_length < 15 + 7:
+                            raise ValueError("wrong match length")
+                        match_length -= 15 + 7
+                    match_length += 15
+                match_length += 7
+            match_length += 3
+            remaining = match_length
+            while remaining > 0:
+                match_size = min(remaining, match_offset)
+                dst += dst[-match_offset : (-match_offset + match_size) or None]
+                remaining -= match_size
+    return bytes(dst)

dissect/util/compression/lzxpress_huffman.py ADDED Viewed

@@ -0,0 +1,184 @@
+# https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-frs2/8cb5bae9-edf3-4833-9f0a-9d7e24218d3d
+# https://winprotocoldoc.blob.core.windows.net/productionwindowsarchives/MS-XCA/[MS-XCA].pdf
+from __future__ import annotations
+import io
+import struct
+from typing import BinaryIO, NamedTuple
+class Symbol(NamedTuple):
+    length: int
+    symbol: int
+def _read_16_bit(fh: BinaryIO) -> int:
+    return struct.unpack("<H", fh.read(2).rjust(2, b"\x00"))[0]
+class Node:
+    __slots__ = ("children", "is_leaf", "symbol")
+    def __init__(self, symbol: Symbol | None = None, is_leaf: bool = False):
+        self.symbol = symbol
+        self.is_leaf = is_leaf
+        self.children = [None, None]
+def _add_leaf(nodes: list[Node], idx: int, mask: int, bits: int) -> int:
+    node = nodes[0]
+    i = idx + 1
+    while bits > 1:
+        bits -= 1
+        childidx = (mask >> bits) & 1
+        if node.children[childidx] is None:
+            node.children[childidx] = nodes[i]
+            nodes[i].is_leaf = False
+            i += 1
+        node = node.children[childidx]
+    node.children[mask & 1] = nodes[idx]
+    return i
+def _build_tree(buf: bytes) -> Node:
+    if len(buf) != 256:
+        raise ValueError("Not enough data for Huffman code tree")
+    nodes = [Node() for _ in range(1024)]
+    symbols: list[Symbol] = []
+    for i, c in enumerate(buf):
+        symbols.append(Symbol(c & 0x0F, i * 2))
+        symbols.append(Symbol((c >> 4) & 0x0F, i * 2 + 1))
+    symbols = sorted(symbols)
+    symbol_index_start = 0
+    for s in symbols:
+        if s.length > 0:
+            break
+        symbol_index_start += 1
+    mask = 0
+    bits = 1
+    root = nodes[0]
+    tree_index = 1
+    for symbol_index in range(symbol_index_start, 512):
+        s = symbols[symbol_index]
+        node = nodes[tree_index]
+        node.symbol = s.symbol
+        node.is_leaf = True
+        mask = (mask << s.length - bits) & 0xFFFFFFFF
+        bits = s.length
+        tree_index = _add_leaf(nodes, tree_index, mask, bits)
+        mask += 1
+    return root
+class BitString:
+    def __init__(self):
+        self.source = None
+        self.mask = 0
+        self.bits = 0
+    @property
+    def index(self) -> int:
+        return self.source.tell()
+    def init(self, fh: BinaryIO) -> None:
+        self.mask = (_read_16_bit(fh) << 16) + _read_16_bit(fh)
+        self.bits = 32
+        self.source = fh
+    def read(self, n: int) -> bytes:
+        return self.source.read(n)
+    def lookup(self, n: int) -> int:
+        if n == 0:
+            return 0
+        return self.mask >> (32 - n)
+    def skip(self, n: int) -> None:
+        self.mask = (self.mask << n) & 0xFFFFFFFF
+        self.bits -= n
+        if self.bits < 16:
+            self.mask += _read_16_bit(self.source) << (16 - self.bits)
+            self.bits += 16
+    def decode(self, root: Node) -> Symbol:
+        node = root
+        while not node.is_leaf:
+            bit = self.lookup(1)
+            self.skip(1)
+            node = node.children[bit]
+        return node.symbol
+def decompress(src: bytes | BinaryIO) -> bytes:
+    """LZXPRESS decompress from a file-like object or bytes.
+    Decompresses until EOF of the input data.
+    Args:
+        src: File-like object or bytes to decompress.
+    Returns:
+        The decompressed data.
+    """
+    if not hasattr(src, "read"):
+        src = io.BytesIO(src)
+    dst = bytearray()
+    start_offset = src.tell()
+    src.seek(0, io.SEEK_END)
+    size = src.tell() - start_offset
+    src.seek(start_offset, io.SEEK_SET)
+    bitstring = BitString()
+    while src.tell() - start_offset < size:
+        root = _build_tree(src.read(256))
+        bitstring.init(src)
+        chunk_size = 0
+        while chunk_size < 65536 and src.tell() - start_offset < size:
+            symbol = bitstring.decode(root)
+            if symbol < 256:
+                dst.append(symbol)
+                chunk_size += 1
+            else:
+                symbol -= 256
+                length = symbol & 0x0F
+                symbol >>= 4
+                offset = (1 << symbol) + bitstring.lookup(symbol)
+                if length == 15:
+                    length = ord(bitstring.read(1)) + 15
+                    if length == 270:
+                        length = _read_16_bit(bitstring.source)
+                bitstring.skip(symbol)
+                length += 3
+                remaining = length
+                while remaining > 0:
+                    match_size = min(remaining, offset)
+                    dst += dst[-offset : (-offset + match_size) or None]
+                    remaining -= match_size
+                chunk_size += length
+    return bytes(dst)

dissect/util/compression/sevenbit.py ADDED Viewed

@@ -0,0 +1,77 @@
+from __future__ import annotations
+from io import BytesIO
+from typing import BinaryIO
+def compress(src: bytes | BinaryIO) -> bytes:
+    """Sevenbit compress from a file-like object or bytes.
+    Args:
+        src: File-like object or bytes to compress.
+    Returns:
+        The compressed data.
+    """
+    if not hasattr(src, "read"):
+        src = BytesIO(src)
+    dst = bytearray()
+    val = 0
+    shift = 0
+    while True:
+        _byte = src.read(1)
+        if not len(_byte):
+            break
+        val |= (_byte[0] & 0x7F) << shift
+        shift += 7
+        if shift >= 8:
+            dst.append(val & 0xFF)
+            val >>= 8
+            shift -= 8
+    if val:
+        dst.append(val & 0xFF)
+    return bytes(dst)
+def decompress(src: bytes | BinaryIO, wide: bool = False) -> bytes:
+    """Sevenbit decompress from a file-like object or bytes.
+    Args:
+        src: File-like object or bytes to decompress.
+    Returns:
+        The decompressed data.
+    """
+    if not hasattr(src, "read"):
+        src = BytesIO(src)
+    dst = bytearray()
+    val = 0
+    shift = 0
+    while True:
+        _byte = src.read(1)
+        if not len(_byte):
+            break
+        val |= _byte[0] << shift
+        dst.append(val & 0x7F)
+        if wide:
+            dst.append(0)
+        val >>= 7
+        shift += 1
+        if shift == 7:
+            dst.append(val & 0x7F)
+            if wide:
+                dst.append(0)
+            val >>= 7
+            shift = 0
+    return bytes(dst)

dissect/util/compression/xz.py ADDED Viewed

@@ -0,0 +1,112 @@
+import io
+from binascii import crc32
+from typing import BinaryIO
+from dissect.util.stream import OverlayStream
+HEADER_FOOTER_SIZE = 12
+CRC_SIZE = 4
+def repair_checksum(fh: BinaryIO) -> BinaryIO:
+    """Repair CRC32 checksums for all headers in an XZ stream.
+    FortiOS XZ files have (on purpose) corrupt streams which they read using a modified ``xz`` binary.
+    The only thing changed are the CRC32 checksums, so partially parse the XZ file and fix all of them.
+    References:
+        - https://tukaani.org/xz/xz-file-format-1.1.0.txt
+        - https://github.com/Rogdham/python-xz
+    Args:
+        fh: A file-like object of an LZMA stream to repair.
+    """
+    file_size = fh.seek(0, io.SEEK_END)
+    repaired = OverlayStream(fh, file_size)
+    fh.seek(0)
+    header = fh.read(HEADER_FOOTER_SIZE)
+    # Check header magic
+    magic = b"\xfd7zXZ\x00"
+    if header[: len(magic)] != magic:
+        raise ValueError("Not an XZ file")
+    # Add correct header CRC32
+    repaired.add(fh.tell() - CRC_SIZE, _crc32(header[len(magic) : HEADER_FOOTER_SIZE - CRC_SIZE]))
+    footer_offset = fh.seek(-HEADER_FOOTER_SIZE, io.SEEK_END)
+    footer = fh.read(HEADER_FOOTER_SIZE)
+    # Check footer magic
+    footer_magic = b"YZ"
+    if footer[HEADER_FOOTER_SIZE - len(footer_magic) : HEADER_FOOTER_SIZE] != footer_magic:
+        raise ValueError("Not an XZ file")
+    # Add correct footer CRC32
+    repaired.add(footer_offset, _crc32(footer[CRC_SIZE : HEADER_FOOTER_SIZE - len(footer_magic)]))
+    backward_size = (int.from_bytes(footer[4:8], "little") + 1) * 4
+    fh.seek(-HEADER_FOOTER_SIZE - backward_size, io.SEEK_END)
+    index = fh.read(backward_size)
+    # Add correct index CRC32
+    repaired.add(fh.tell() - CRC_SIZE, _crc32(index[:-CRC_SIZE]))
+    # Parse the index
+    isize, num_records = _mbi(index[1:])
+    index = index[1 + isize : -4]
+    records = []
+    for _ in range(num_records):
+        if not index:
+            raise ValueError("Missing index size")
+        isize, unpadded_size = _mbi(index)
+        if not unpadded_size:
+            raise ValueError("Missing index record unpadded size")
+        index = index[isize:]
+        if not index:
+            raise ValueError("Missing index size")
+        isize, uncompressed_size = _mbi(index)
+        if not uncompressed_size:
+            raise ValueError("Missing index record uncompressed size")
+        index = index[isize:]
+        records.append((unpadded_size, uncompressed_size))
+    block_start = file_size - HEADER_FOOTER_SIZE - backward_size
+    blocks_len = sum((unpadded_size + 3) & ~3 for unpadded_size, _ in records)
+    block_start -= blocks_len
+    # Iterate over all blocks and add the correct block header CRC32
+    for unpadded_size, _ in records:
+        fh.seek(block_start)
+        block_header = fh.read(1)
+        block_header_size = (block_header[0] + 1) * 4
+        block_header += fh.read(block_header_size - 1)
+        repaired.add(fh.tell() - CRC_SIZE, _crc32(block_header[:-CRC_SIZE]))
+        block_start += (unpadded_size + 3) & ~3
+    return repaired
+def _mbi(data: bytes) -> tuple[int, int]:
+    """Decode a multibyte integer.
+    The encoding is similar to most other "varint" encodings. For each byte, the 7 least significant bits are used for
+    the integer value. The most significant bit is used to indicate if the integer continues in the next byte.
+    Bytes are ordered in little endian byte order, meaning the least significant byte comes first.
+    """
+    value = 0
+    for size, byte in enumerate(data):
+        value |= (byte & 0x7F) << (size * 7)
+        if not byte & 0x80:
+            return size + 1, value
+    raise ValueError("Invalid mbi")
+def _crc32(data: bytes) -> bytes:
+    return int.to_bytes(crc32(data), CRC_SIZE, "little")