PyPI - dissect.archive - Versions diffs - 1.4.dev1__tar.gz → 1.5.dev2__tar.gz - Mend

dissect.archive 1.4.dev1tar.gz → 1.5.dev2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

dissect_archive-1.5.dev2/.git-blame-ignore-revs ADDED Viewed

@@ -0,0 +1,6 @@
+# Formatting commits. You can ignore them during git-blame with `--ignore-rev` or `--ignore-revs-file`.
+#
+#   $ git config --add 'blame.ignoreRevsFile' '.git-blame-ignore-revs'
+#
+# Change linter to Ruff (#11)
+f027f8fc9177f9193fe59212b0761d737c36d370

{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: dissect.archive
-Version: 1.4.dev1
+Version: 1.5.dev2
 Summary: A Dissect module implementing parsers for various archive and backup formats
 Author-email: Dissect Team <dissect@fox-it.com>
 License: Affero General Public License v3

{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect/archive/tools/backup.py RENAMED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import argparse
 import logging
 import sys
@@ -61,9 +63,9 @@ except ImportError:
             self.draw()
         def draw(self) -> None:
-            infos = []
-            for info in self._info.values():
-                infos.append(f"{info['filename']} {(info['position'] / info['total']) * 100:0.2f}%")
+            infos = [
+                f"{info['filename']} {(info['position'] / info['total']) * 100:0.2f}%" for info in self._info.values()
+            ]
             sys.stderr.write("\r" + " | ".join(infos))
             sys.stderr.flush()

{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect/archive/vbk.py RENAMED Viewed

@@ -5,10 +5,9 @@ from __future__ import annotations
 from functools import cached_property, lru_cache
 from io import BytesIO
-from typing import BinaryIO, Generic, Iterator, TypeVar
+from typing import TYPE_CHECKING, BinaryIO, Generic, TypeVar
 from zlib import crc32
-from dissect.cstruct import Structure
 from dissect.util.compression import lz4
 from dissect.util.crc32c import crc32c
 from dissect.util.stream import AlignedStream
@@ -17,6 +16,11 @@ from dissect.util.xmemoryview import xmemoryview
 from dissect.archive.c_vbk import PAGE_SIZE, c_vbk
 from dissect.archive.exceptions import Error
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+    from dissect.cstruct import Structure
 class VBKError(Error):
     pass
@@ -860,8 +864,7 @@ class MetaVector(Generic[T]):
         offset = (offset * self._entry_size) + 8
         buf = self.vbk.page(page)
-        entry = buf[offset : offset + self._entry_size]
-        return entry
+        return buf[offset : offset + self._entry_size]
     def get(self, idx: int) -> T:
         """Get an entry from the vector.

{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect/archive/vma.py RENAMED Viewed

@@ -1,11 +1,13 @@
 # References:
 # - https://git.proxmox.com/?p=pve-qemu.git;a=blob;f=vma_spec.txt
 # - https://lists.gnu.org/archive/html/qemu-devel/2013-02/msg03667.html
+from __future__ import annotations
 import hashlib
 import struct
 from collections import defaultdict
 from functools import lru_cache
+from typing import TYPE_CHECKING, BinaryIO
 from uuid import UUID
 from dissect.util import ts
@@ -14,6 +16,10 @@ from dissect.util.stream import AlignedStream
 from dissect.archive.c_vma import VMA_EXTENT_MAGIC, VMA_MAGIC, c_vma
 from dissect.archive.exceptions import InvalidHeaderError
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+    from datetime import datetime
 class VMA:
     """Proxmox VMA.
@@ -24,7 +30,7 @@ class VMA:
     The ``vma-extract`` utility can be used for that.
     """
-    def __init__(self, fh):
+    def __init__(self, fh: BinaryIO):
         self.fh = fh
         fh.seek(0)
@@ -46,7 +52,7 @@ class VMA:
         self._blob = memoryview(bytes(header_data))[blob_start:blob_end]
         blob_offset = 1
-        self._blob_data = {}
+        self._blob_data: dict[int, bytes] = {}
         while blob_offset + 2 <= self.header.blob_buffer_size:
             # The header is in big endian, but this is little...
             size = struct.unpack("<H", self._blob[blob_offset : blob_offset + 2])[0]
@@ -54,14 +60,14 @@ class VMA:
                 self._blob_data[blob_offset] = self._blob[blob_offset + 2 : blob_offset + 2 + size].tobytes()
             blob_offset += size + 2
-        self._config = {}
+        self._config: dict[str, bytes] = {}
         for conf_name, conf_data in zip(self.header.config_names, self.header.config_data):
             if (conf_name, conf_data) == (0, 0):
                 continue
             self._config[self.blob_string(conf_name)] = self.blob_data(conf_data)
-        self._devices = {}
+        self._devices: dict[int, Device] = {}
         for dev_id, dev_info in enumerate(self.header.dev_info):
             if dev_id == 0 or dev_info.devname_ptr == 0:
                 continue
@@ -71,33 +77,33 @@ class VMA:
         self._extent = lru_cache(65536)(self._extent)
     @property
-    def creation_time(self):
+    def creation_time(self) -> datetime:
         return ts.from_unix(self.header.ctime)
-    def blob_data(self, offset):
+    def blob_data(self, offset: int) -> bytes:
         if offset not in self._blob_data:
             raise KeyError(f"No blob data for offset {offset}")
         return self._blob_data[offset]
-    def blob_string(self, offset):
+    def blob_string(self, offset: int) -> str:
         return self.blob_data(offset).decode().rstrip("\x00")
-    def config(self, name):
+    def config(self, name: str) -> bytes:
         return self._config[name]
-    def configs(self):
+    def configs(self) -> dict[str, bytes]:
         return self._config
-    def device(self, dev_id):
+    def device(self, dev_id: int) -> Device:
         return self._devices[dev_id]
-    def devices(self):
+    def devices(self) -> list[Device]:
         return list(self._devices.values())
-    def _extent(self, offset):
+    def _extent(self, offset: int) -> Extent:
         return Extent(self.fh, offset)
-    def extents(self):
+    def extents(self) -> Iterator[Extent]:
         offset = self.header.header_size
         while True:
             try:
@@ -111,21 +117,21 @@ class VMA:
 class Device:
-    def __init__(self, vma, dev_id, name, size):
+    def __init__(self, vma: VMA, dev_id: int, name: str, size: int):
         self.vma = vma
         self.id = dev_id
         self.name = name
         self.size = size
-    def __repr__(self):
+    def __repr__(self) -> str:
         return f"<Device id={self.id} name={self.name} size={self.size}>"
-    def open(self):
+    def open(self) -> DeviceDataStream:
         return DeviceDataStream(self)
 class Extent:
-    def __init__(self, fh, offset):
+    def __init__(self, fh: BinaryIO, offset: int):
         self.fh = fh
         self.offset = offset
         self.data_offset = offset + c_vma.VMA_EXTENT_HEADER_SIZE
@@ -175,17 +181,17 @@ class Extent:
             else:
                 block_offset += bin(mask).count("1") * c_vma.VMA_BLOCK_SIZE
-    def __repr__(self):
+    def __repr__(self) -> str:
         return f"<Extent offset=0x{self.offset:x} size=0x{self.size:x}>"
 class DeviceDataStream(AlignedStream):
-    def __init__(self, device):
+    def __init__(self, device: Device):
         self.device = device
         self.vma = device.vma
         super().__init__(size=device.size, align=c_vma.VMA_CLUSTER_SIZE)
-    def _read(self, offset, length):
+    def _read(self, offset: int, length: int) -> bytes:
         cluster_offset = offset // c_vma.VMA_CLUSTER_SIZE
         cluster_count = (length + c_vma.VMA_CLUSTER_SIZE - 1) // c_vma.VMA_CLUSTER_SIZE
         block_count = (length + c_vma.VMA_BLOCK_SIZE - 1) // c_vma.VMA_BLOCK_SIZE
@@ -215,7 +221,7 @@ class DeviceDataStream(AlignedStream):
         return b"".join(result)
-def _iter_clusters(vma, dev_id, cluster, count):
+def _iter_clusters(vma: VMA, dev_id: int, cluster: int, count: int) -> Iterator[tuple[int, int, int]]:
     # Find clusters and starting offsets in all extents
     temp = {}
     end = cluster + count
@@ -251,7 +257,7 @@ def _iter_clusters(vma, dev_id, cluster, count):
         cluster += 1
-def _iter_mask(mask, length):
+def _iter_mask(mask: int, length: int) -> Iterator[tuple[int, int]]:
     # Yield consecutive bitmask values
     current_status = mask & 1
     current_count = 0

{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect/archive/wim.py RENAMED Viewed

@@ -2,9 +2,8 @@ from __future__ import annotations
 import io
 import struct
-from datetime import datetime
 from functools import cached_property, lru_cache
-from typing import BinaryIO, Callable, Iterator, Optional
+from typing import TYPE_CHECKING, BinaryIO, Callable
 from dissect.util.stream import AlignedStream, BufferedStream, RelativeStream
 from dissect.util.ts import wintimestamp
@@ -26,6 +25,10 @@ from dissect.archive.exceptions import (
     NotAReparsePointError,
 )
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+    from datetime import datetime
 DEFAULT_CHUNK_SIZE = 32 * 1024
@@ -78,14 +81,14 @@ class WIM:
 class Resource:
     __slots__ = (
-        "wim",
-        "size",
         "flags",
+        "hash",
         "offset",
         "original_size",
         "part_number",
         "reference_count",
-        "hash",
+        "size",
+        "wim",
     )
     def __init__(
@@ -95,9 +98,9 @@ class Resource:
         flags: RESHDR_FLAG,
         offset: int,
         original_size: int,
-        part_number: Optional[int] = None,
-        reference_count: Optional[int] = None,
-        hash: Optional[bytes] = None,
+        part_number: int | None = None,
+        reference_count: int | None = None,
+        hash: bytes | None = None,
     ):
         self.wim = wim
         self.size = size
@@ -149,8 +152,8 @@ class Resource:
             if decompressor is None:
                 raise NotImplementedError(f"Compression algorithm not yet supported: {compression_flags}")
             return CompressedStream(self.wim.fh, self.offset, self.size, self.original_size, decompressor)
-        else:
-            return RelativeStream(self.wim.fh, self.offset, self.size)
+        return RelativeStream(self.wim.fh, self.offset, self.size)
 class Image:
@@ -165,7 +168,7 @@ class Image:
     def __repr__(self) -> str:
         return "<Image>"
-    def get(self, path: str, entry: Optional[DirectoryEntry] = None) -> DirectoryEntry:
+    def get(self, path: str, entry: DirectoryEntry | None = None) -> DirectoryEntry:
         # Programmatically we will often use the `/` separator, so replace it with the native path separator of NTFS
         # `/` is an illegal character in NTFS filenames, so it's safe to replace
         search_path = path.replace("/", "\\")
@@ -367,8 +370,8 @@ class DirectoryEntry:
         if resource := self.image.wim.resources.get(stream_hash):
             return resource.open()
-        else:
-            raise FileNotFoundError(f"Unable to find resource for directory entry {self}")
+        raise FileNotFoundError(f"Unable to find resource for directory entry {self}")
 class ReparsePoint:
@@ -391,7 +394,7 @@ class ReparsePoint:
         self._buf = fh.read()
     @property
-    def substitute_name(self) -> Optional[str]:
+    def substitute_name(self) -> str | None:
         if not self.info:
             return None
@@ -400,7 +403,7 @@ class ReparsePoint:
         return self._buf[offset : offset + length].decode("utf-16-le")
     @property
-    def print_name(self) -> Optional[str]:
+    def print_name(self) -> str | None:
         if not self.info:
             return None
@@ -446,7 +449,7 @@ class CompressedStream(AlignedStream):
         else:
             entry_size = "Q" if original_size > 0xFFFFFFFF else "I"
             pattern = f"<{num_chunks}{entry_size}"
-            self._chunks = (0,) + struct.unpack(pattern, fh.read(struct.calcsize(pattern)))
+            self._chunks = (0, *struct.unpack(pattern, fh.read(struct.calcsize(pattern))))
         self._data_offset = fh.tell()

{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect/archive/xva.py RENAMED Viewed

@@ -1,10 +1,17 @@
+from __future__ import annotations
 import hashlib
 import tarfile
 from bisect import bisect_right
+from functools import cached_property
+from typing import TYPE_CHECKING, BinaryIO
 from xml.etree import ElementTree
 from dissect.util.stream import AlignedStream
+if TYPE_CHECKING:
+    from collections.abc import Iterator
 BLOCK_SIZE = 1024 * 1024
@@ -14,20 +21,17 @@ class XVA:
     XenCenter export format. Basically a tar file with "blocks" of 1MB.
     """
-    def __init__(self, fh):
+    def __init__(self, fh: BinaryIO):
         # We don't have to cache tar members, tarfile already does that for us
-        self.tar = tarfile.open(fileobj=fh)
-        self._ova = None
-    @property
-    def ova(self):
-        if not self._ova:
-            ova_member = self.tar.getmember("ova.xml")
-            ova_fh = self.tar.extractfile(ova_member)
-            self._ova = ElementTree.fromstring(ova_fh.read())
-        return self._ova
-    def disks(self):
+        self.tar = tarfile.open(fileobj=fh)  # noqa: SIM115
+    @cached_property
+    def ova(self) -> ElementTree.Element:
+        ova_member = self.tar.getmember("ova.xml")
+        ova_fh = self.tar.extractfile(ova_member)
+        return ElementTree.fromstring(ova_fh.read())
+    def disks(self) -> list[str]:
         return [
             el.text
             for el in self.ova.findall(
@@ -35,7 +39,7 @@ class XVA:
             )
         ]
-    def open(self, ref, verify=False):
+    def open(self, ref: str, verify: bool = False) -> XVAStream:
         size = int(
             self.ova.find(f"*//member/name[.='id']/../value[.='{ref}']/../..//name[.='virtual_size']/../value").text
         )
@@ -55,7 +59,7 @@ class XVAStream(AlignedStream):
     data for that current offset. For this reason we build a lookup list with offsets.
     """
-    def __init__(self, xva, ref, size, verify=False):
+    def __init__(self, xva: XVA, ref: str, size: int, verify: bool = False):
         self.xva = xva
         self.ref = ref
         self.verify = verify
@@ -79,7 +83,7 @@ class XVAStream(AlignedStream):
         super().__init__(size, align=BLOCK_SIZE)
-    def _read(self, offset, length):
+    def _read(self, offset: int, length: int) -> bytes:
         result = []
         while length > 0:
@@ -102,8 +106,8 @@ class XVAStream(AlignedStream):
                         and hashlib.sha1(buf).hexdigest() != self.xva.tar.extractfile(checksum_member).read().decode()
                     ):
                         raise ValueError(f"Invalid checksum for {checksum_member.name}")
-                    else:
-                        raise NotImplementedError(f"Unsupported checksum: {checksum_member.name}")
+                    raise NotImplementedError(f"Unsupported checksum: {checksum_member.name}")
                 result.append(buf)
@@ -113,7 +117,7 @@ class XVAStream(AlignedStream):
         return b"".join(result)
-def _iter_block_files(xva, ref):
+def _iter_block_files(xva: XVA, ref: str) -> Iterator[tuple[int, tarfile.TarInfo, tarfile.TarInfo]]:
     member_index = None
     block_member = None
     checksum_member = None

{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect.archive.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: dissect.archive
-Version: 1.4.dev1
+Version: 1.5.dev2
 Summary: A Dissect module implementing parsers for various archive and backup formats
 Author-email: Dissect Team <dissect@fox-it.com>
 License: Affero General Public License v3

{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect.archive.egg-info/SOURCES.txt RENAMED Viewed

@@ -1,3 +1,4 @@
+.git-blame-ignore-revs
 .gitattributes
 COPYRIGHT
 LICENSE
@@ -22,6 +23,7 @@ dissect/archive/wim.py
 dissect/archive/xva.py
 dissect/archive/tools/__init__.py
 dissect/archive/tools/backup.py
+tests/__init__.py
 tests/conftest.py
 tests/test_exceptions.py
 tests/test_vbk.py

{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/pyproject.toml RENAMED Viewed

@@ -50,13 +50,56 @@ vma-extract = "dissect.archive.tools.backup:main"
 vbk-extract = "dissect.archive.tools.backup:main"
 backup-extract = "dissect.archive.tools.backup:main"
-[tool.black]
+[tool.ruff]
 line-length = 120
+required-version = ">=0.9.0"
-[tool.isort]
-profile = "black"
-known_first_party = ["dissect.archive"]
-known_third_party = ["dissect"]
+[tool.ruff.format]
+docstring-code-format = true
+[tool.ruff.lint]
+select = [
+  "F",
+  "E",
+  "W",
+  "I",
+  "UP",
+  "YTT",
+  "ANN",
+  "B",
+  "C4",
+  "DTZ",
+  "T10",
+  "FA",
+  "ISC",
+  "G",
+  "INP",
+  "PIE",
+  "PYI",
+  "PT",
+  "Q",
+  "RSE",
+  "RET",
+  "SLOT",
+  "SIM",
+  "TID",
+  "TCH",
+  "PTH",
+  "PLC",
+  "TRY",
+  "FLY",
+  "PERF",
+  "FURB",
+  "RUF",
+]
+ignore = ["E203", "B904", "UP024", "ANN002", "ANN003", "ANN204", "ANN401", "SIM105", "TRY003"]
+[tool.ruff.lint.per-file-ignores]
+"tests/docs/**" = ["INP001"]
+[tool.ruff.lint.isort]
+known-first-party = ["dissect.archive"]
+known-third-party = ["dissect"]
 [tool.setuptools]
 license-files = ["LICENSE", "COPYRIGHT"]

dissect_archive-1.5.dev2/tests/__init__.py ADDED Viewed

File without changes

{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/tests/conftest.py RENAMED Viewed

@@ -1,16 +1,21 @@
+from __future__ import annotations
 import gzip
-import os
-from typing import BinaryIO, Iterator
+from pathlib import Path
+from typing import TYPE_CHECKING, BinaryIO
 import pytest
+if TYPE_CHECKING:
+    from collections.abc import Iterator
-def absolute_path(filename) -> str:
-    return os.path.join(os.path.dirname(__file__), filename)
+def absolute_path(filename: str) -> Path:
+    return Path(__file__).parent / filename
 def open_file(name: str, mode: str = "rb") -> Iterator[BinaryIO]:
-    with open(absolute_path(name), mode) as f:
+    with absolute_path(name).open(mode) as f:
         yield f

{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/tests/test_exceptions.py RENAMED Viewed

@@ -4,7 +4,7 @@ from dissect.archive import exceptions
 @pytest.mark.parametrize(
-    "exc, std",
+    ("exc", "std"),
     [
         (exceptions.FileNotFoundError, FileNotFoundError),
         (exceptions.IsADirectoryError, IsADirectoryError),

{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/tests/test_vbk.py RENAMED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import hashlib
 import struct
 from typing import BinaryIO
@@ -14,6 +16,7 @@ from dissect.archive.vbk import (
 def test_vbk_version_9(vbk9: BinaryIO) -> None:
+    """test VBK parsing for version 9 files"""
     vbk = VBK(vbk9)
     assert vbk.format_version == 9
@@ -49,6 +52,7 @@ def test_vbk_version_9(vbk9: BinaryIO) -> None:
 def test_vbk_version_13(vbk13: BinaryIO) -> None:
+    """test VBK parsing for version 13 files"""
     vbk = VBK(vbk13)
     assert vbk.format_version == 13

{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/tests/test_vma.py RENAMED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import hashlib
 from typing import BinaryIO

{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/tests/test_wim.py RENAMED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import hashlib
 from typing import BinaryIO
@@ -11,7 +13,7 @@ def test_wim(basic_wim: BinaryIO) -> None:
     assert len(images) == 1
     image = images[0]
-    assert sorted(list(image.root.listdir().keys())) == ["ads.txt", "dir", "file.txt", "link.txt"]
+    assert sorted(image.root.listdir().keys()) == ["ads.txt", "dir", "file.txt", "link.txt"]
     entry = image.get("file.txt")
     assert entry.is_file()
@@ -42,7 +44,7 @@ def test_wim(basic_wim: BinaryIO) -> None:
     assert not entry.is_file()
     assert entry.is_dir()
     assert not entry.is_reparse_point()
-    assert sorted(list(entry.listdir().keys())) == ["another.txt"]
+    assert sorted(entry.listdir().keys()) == ["another.txt"]
     entry = image.get("dir/another.txt")
     assert entry.is_file()

{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/tox.ini RENAMED Viewed

@@ -32,23 +32,17 @@ commands =
 [testenv:fix]
 package = skip
 deps =
-    black==23.1.0
-    isort==5.11.4
+    ruff==0.9.2
 commands =
-    black dissect tests
-    isort dissect tests
+    ruff format dissect tests
 [testenv:lint]
 package = skip
 deps =
-    black==23.1.0
-    flake8
-    flake8-black
-    flake8-isort
-    isort==5.11.4
+    ruff==0.9.2
     vermin
 commands =
-    flake8 dissect tests
+    ruff check dissect tests
     vermin -t=3.9- --no-tips --lint dissect tests
 [flake8]