dissect.archive 1.4.dev1__tar.gz → 1.5.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dissect_archive-1.5.dev2/.git-blame-ignore-revs +6 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/PKG-INFO +2 -2
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect/archive/tools/backup.py +5 -3
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect/archive/vbk.py +7 -4
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect/archive/vma.py +28 -22
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect/archive/wim.py +19 -16
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect/archive/xva.py +23 -19
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect.archive.egg-info/PKG-INFO +2 -2
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect.archive.egg-info/SOURCES.txt +2 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/pyproject.toml +48 -5
- dissect_archive-1.5.dev2/tests/__init__.py +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/tests/conftest.py +10 -5
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/tests/test_exceptions.py +1 -1
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/tests/test_vbk.py +4 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/tests/test_vma.py +2 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/tests/test_wim.py +4 -2
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/tox.ini +4 -10
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/.gitattributes +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/COPYRIGHT +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/LICENSE +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/MANIFEST.in +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/README.md +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect/archive/__init__.py +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect/archive/c_vbk.py +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect/archive/c_vma.py +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect/archive/c_wim.py +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect/archive/exceptions.py +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect/archive/tools/__init__.py +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect.archive.egg-info/dependency_links.txt +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect.archive.egg-info/entry_points.txt +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect.archive.egg-info/requires.txt +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect.archive.egg-info/top_level.txt +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/setup.cfg +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/tests/data/basic.wim.gz +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/tests/data/test.vma.gz +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/tests/data/test13.vbk.gz +0 -0
- {dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/tests/data/test9.vbk.gz +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: dissect.archive
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.dev2
|
|
4
4
|
Summary: A Dissect module implementing parsers for various archive and backup formats
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import argparse
|
|
2
4
|
import logging
|
|
3
5
|
import sys
|
|
@@ -61,9 +63,9 @@ except ImportError:
|
|
|
61
63
|
self.draw()
|
|
62
64
|
|
|
63
65
|
def draw(self) -> None:
|
|
64
|
-
infos = [
|
|
65
|
-
|
|
66
|
-
|
|
66
|
+
infos = [
|
|
67
|
+
f"{info['filename']} {(info['position'] / info['total']) * 100:0.2f}%" for info in self._info.values()
|
|
68
|
+
]
|
|
67
69
|
sys.stderr.write("\r" + " | ".join(infos))
|
|
68
70
|
sys.stderr.flush()
|
|
69
71
|
|
|
@@ -5,10 +5,9 @@ from __future__ import annotations
|
|
|
5
5
|
|
|
6
6
|
from functools import cached_property, lru_cache
|
|
7
7
|
from io import BytesIO
|
|
8
|
-
from typing import BinaryIO, Generic,
|
|
8
|
+
from typing import TYPE_CHECKING, BinaryIO, Generic, TypeVar
|
|
9
9
|
from zlib import crc32
|
|
10
10
|
|
|
11
|
-
from dissect.cstruct import Structure
|
|
12
11
|
from dissect.util.compression import lz4
|
|
13
12
|
from dissect.util.crc32c import crc32c
|
|
14
13
|
from dissect.util.stream import AlignedStream
|
|
@@ -17,6 +16,11 @@ from dissect.util.xmemoryview import xmemoryview
|
|
|
17
16
|
from dissect.archive.c_vbk import PAGE_SIZE, c_vbk
|
|
18
17
|
from dissect.archive.exceptions import Error
|
|
19
18
|
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from collections.abc import Iterator
|
|
21
|
+
|
|
22
|
+
from dissect.cstruct import Structure
|
|
23
|
+
|
|
20
24
|
|
|
21
25
|
class VBKError(Error):
|
|
22
26
|
pass
|
|
@@ -860,8 +864,7 @@ class MetaVector(Generic[T]):
|
|
|
860
864
|
offset = (offset * self._entry_size) + 8
|
|
861
865
|
|
|
862
866
|
buf = self.vbk.page(page)
|
|
863
|
-
|
|
864
|
-
return entry
|
|
867
|
+
return buf[offset : offset + self._entry_size]
|
|
865
868
|
|
|
866
869
|
def get(self, idx: int) -> T:
|
|
867
870
|
"""Get an entry from the vector.
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
# References:
|
|
2
2
|
# - https://git.proxmox.com/?p=pve-qemu.git;a=blob;f=vma_spec.txt
|
|
3
3
|
# - https://lists.gnu.org/archive/html/qemu-devel/2013-02/msg03667.html
|
|
4
|
+
from __future__ import annotations
|
|
4
5
|
|
|
5
6
|
import hashlib
|
|
6
7
|
import struct
|
|
7
8
|
from collections import defaultdict
|
|
8
9
|
from functools import lru_cache
|
|
10
|
+
from typing import TYPE_CHECKING, BinaryIO
|
|
9
11
|
from uuid import UUID
|
|
10
12
|
|
|
11
13
|
from dissect.util import ts
|
|
@@ -14,6 +16,10 @@ from dissect.util.stream import AlignedStream
|
|
|
14
16
|
from dissect.archive.c_vma import VMA_EXTENT_MAGIC, VMA_MAGIC, c_vma
|
|
15
17
|
from dissect.archive.exceptions import InvalidHeaderError
|
|
16
18
|
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from collections.abc import Iterator
|
|
21
|
+
from datetime import datetime
|
|
22
|
+
|
|
17
23
|
|
|
18
24
|
class VMA:
|
|
19
25
|
"""Proxmox VMA.
|
|
@@ -24,7 +30,7 @@ class VMA:
|
|
|
24
30
|
The ``vma-extract`` utility can be used for that.
|
|
25
31
|
"""
|
|
26
32
|
|
|
27
|
-
def __init__(self, fh):
|
|
33
|
+
def __init__(self, fh: BinaryIO):
|
|
28
34
|
self.fh = fh
|
|
29
35
|
|
|
30
36
|
fh.seek(0)
|
|
@@ -46,7 +52,7 @@ class VMA:
|
|
|
46
52
|
self._blob = memoryview(bytes(header_data))[blob_start:blob_end]
|
|
47
53
|
|
|
48
54
|
blob_offset = 1
|
|
49
|
-
self._blob_data = {}
|
|
55
|
+
self._blob_data: dict[int, bytes] = {}
|
|
50
56
|
while blob_offset + 2 <= self.header.blob_buffer_size:
|
|
51
57
|
# The header is in big endian, but this is little...
|
|
52
58
|
size = struct.unpack("<H", self._blob[blob_offset : blob_offset + 2])[0]
|
|
@@ -54,14 +60,14 @@ class VMA:
|
|
|
54
60
|
self._blob_data[blob_offset] = self._blob[blob_offset + 2 : blob_offset + 2 + size].tobytes()
|
|
55
61
|
blob_offset += size + 2
|
|
56
62
|
|
|
57
|
-
self._config = {}
|
|
63
|
+
self._config: dict[str, bytes] = {}
|
|
58
64
|
for conf_name, conf_data in zip(self.header.config_names, self.header.config_data):
|
|
59
65
|
if (conf_name, conf_data) == (0, 0):
|
|
60
66
|
continue
|
|
61
67
|
|
|
62
68
|
self._config[self.blob_string(conf_name)] = self.blob_data(conf_data)
|
|
63
69
|
|
|
64
|
-
self._devices = {}
|
|
70
|
+
self._devices: dict[int, Device] = {}
|
|
65
71
|
for dev_id, dev_info in enumerate(self.header.dev_info):
|
|
66
72
|
if dev_id == 0 or dev_info.devname_ptr == 0:
|
|
67
73
|
continue
|
|
@@ -71,33 +77,33 @@ class VMA:
|
|
|
71
77
|
self._extent = lru_cache(65536)(self._extent)
|
|
72
78
|
|
|
73
79
|
@property
|
|
74
|
-
def creation_time(self):
|
|
80
|
+
def creation_time(self) -> datetime:
|
|
75
81
|
return ts.from_unix(self.header.ctime)
|
|
76
82
|
|
|
77
|
-
def blob_data(self, offset):
|
|
83
|
+
def blob_data(self, offset: int) -> bytes:
|
|
78
84
|
if offset not in self._blob_data:
|
|
79
85
|
raise KeyError(f"No blob data for offset {offset}")
|
|
80
86
|
return self._blob_data[offset]
|
|
81
87
|
|
|
82
|
-
def blob_string(self, offset):
|
|
88
|
+
def blob_string(self, offset: int) -> str:
|
|
83
89
|
return self.blob_data(offset).decode().rstrip("\x00")
|
|
84
90
|
|
|
85
|
-
def config(self, name):
|
|
91
|
+
def config(self, name: str) -> bytes:
|
|
86
92
|
return self._config[name]
|
|
87
93
|
|
|
88
|
-
def configs(self):
|
|
94
|
+
def configs(self) -> dict[str, bytes]:
|
|
89
95
|
return self._config
|
|
90
96
|
|
|
91
|
-
def device(self, dev_id):
|
|
97
|
+
def device(self, dev_id: int) -> Device:
|
|
92
98
|
return self._devices[dev_id]
|
|
93
99
|
|
|
94
|
-
def devices(self):
|
|
100
|
+
def devices(self) -> list[Device]:
|
|
95
101
|
return list(self._devices.values())
|
|
96
102
|
|
|
97
|
-
def _extent(self, offset):
|
|
103
|
+
def _extent(self, offset: int) -> Extent:
|
|
98
104
|
return Extent(self.fh, offset)
|
|
99
105
|
|
|
100
|
-
def extents(self):
|
|
106
|
+
def extents(self) -> Iterator[Extent]:
|
|
101
107
|
offset = self.header.header_size
|
|
102
108
|
while True:
|
|
103
109
|
try:
|
|
@@ -111,21 +117,21 @@ class VMA:
|
|
|
111
117
|
|
|
112
118
|
|
|
113
119
|
class Device:
|
|
114
|
-
def __init__(self, vma, dev_id, name, size):
|
|
120
|
+
def __init__(self, vma: VMA, dev_id: int, name: str, size: int):
|
|
115
121
|
self.vma = vma
|
|
116
122
|
self.id = dev_id
|
|
117
123
|
self.name = name
|
|
118
124
|
self.size = size
|
|
119
125
|
|
|
120
|
-
def __repr__(self):
|
|
126
|
+
def __repr__(self) -> str:
|
|
121
127
|
return f"<Device id={self.id} name={self.name} size={self.size}>"
|
|
122
128
|
|
|
123
|
-
def open(self):
|
|
129
|
+
def open(self) -> DeviceDataStream:
|
|
124
130
|
return DeviceDataStream(self)
|
|
125
131
|
|
|
126
132
|
|
|
127
133
|
class Extent:
|
|
128
|
-
def __init__(self, fh, offset):
|
|
134
|
+
def __init__(self, fh: BinaryIO, offset: int):
|
|
129
135
|
self.fh = fh
|
|
130
136
|
self.offset = offset
|
|
131
137
|
self.data_offset = offset + c_vma.VMA_EXTENT_HEADER_SIZE
|
|
@@ -175,17 +181,17 @@ class Extent:
|
|
|
175
181
|
else:
|
|
176
182
|
block_offset += bin(mask).count("1") * c_vma.VMA_BLOCK_SIZE
|
|
177
183
|
|
|
178
|
-
def __repr__(self):
|
|
184
|
+
def __repr__(self) -> str:
|
|
179
185
|
return f"<Extent offset=0x{self.offset:x} size=0x{self.size:x}>"
|
|
180
186
|
|
|
181
187
|
|
|
182
188
|
class DeviceDataStream(AlignedStream):
|
|
183
|
-
def __init__(self, device):
|
|
189
|
+
def __init__(self, device: Device):
|
|
184
190
|
self.device = device
|
|
185
191
|
self.vma = device.vma
|
|
186
192
|
super().__init__(size=device.size, align=c_vma.VMA_CLUSTER_SIZE)
|
|
187
193
|
|
|
188
|
-
def _read(self, offset, length):
|
|
194
|
+
def _read(self, offset: int, length: int) -> bytes:
|
|
189
195
|
cluster_offset = offset // c_vma.VMA_CLUSTER_SIZE
|
|
190
196
|
cluster_count = (length + c_vma.VMA_CLUSTER_SIZE - 1) // c_vma.VMA_CLUSTER_SIZE
|
|
191
197
|
block_count = (length + c_vma.VMA_BLOCK_SIZE - 1) // c_vma.VMA_BLOCK_SIZE
|
|
@@ -215,7 +221,7 @@ class DeviceDataStream(AlignedStream):
|
|
|
215
221
|
return b"".join(result)
|
|
216
222
|
|
|
217
223
|
|
|
218
|
-
def _iter_clusters(vma, dev_id, cluster, count):
|
|
224
|
+
def _iter_clusters(vma: VMA, dev_id: int, cluster: int, count: int) -> Iterator[tuple[int, int, int]]:
|
|
219
225
|
# Find clusters and starting offsets in all extents
|
|
220
226
|
temp = {}
|
|
221
227
|
end = cluster + count
|
|
@@ -251,7 +257,7 @@ def _iter_clusters(vma, dev_id, cluster, count):
|
|
|
251
257
|
cluster += 1
|
|
252
258
|
|
|
253
259
|
|
|
254
|
-
def _iter_mask(mask, length):
|
|
260
|
+
def _iter_mask(mask: int, length: int) -> Iterator[tuple[int, int]]:
|
|
255
261
|
# Yield consecutive bitmask values
|
|
256
262
|
current_status = mask & 1
|
|
257
263
|
current_count = 0
|
|
@@ -2,9 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import io
|
|
4
4
|
import struct
|
|
5
|
-
from datetime import datetime
|
|
6
5
|
from functools import cached_property, lru_cache
|
|
7
|
-
from typing import BinaryIO, Callable
|
|
6
|
+
from typing import TYPE_CHECKING, BinaryIO, Callable
|
|
8
7
|
|
|
9
8
|
from dissect.util.stream import AlignedStream, BufferedStream, RelativeStream
|
|
10
9
|
from dissect.util.ts import wintimestamp
|
|
@@ -26,6 +25,10 @@ from dissect.archive.exceptions import (
|
|
|
26
25
|
NotAReparsePointError,
|
|
27
26
|
)
|
|
28
27
|
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from collections.abc import Iterator
|
|
30
|
+
from datetime import datetime
|
|
31
|
+
|
|
29
32
|
DEFAULT_CHUNK_SIZE = 32 * 1024
|
|
30
33
|
|
|
31
34
|
|
|
@@ -78,14 +81,14 @@ class WIM:
|
|
|
78
81
|
|
|
79
82
|
class Resource:
|
|
80
83
|
__slots__ = (
|
|
81
|
-
"wim",
|
|
82
|
-
"size",
|
|
83
84
|
"flags",
|
|
85
|
+
"hash",
|
|
84
86
|
"offset",
|
|
85
87
|
"original_size",
|
|
86
88
|
"part_number",
|
|
87
89
|
"reference_count",
|
|
88
|
-
"
|
|
90
|
+
"size",
|
|
91
|
+
"wim",
|
|
89
92
|
)
|
|
90
93
|
|
|
91
94
|
def __init__(
|
|
@@ -95,9 +98,9 @@ class Resource:
|
|
|
95
98
|
flags: RESHDR_FLAG,
|
|
96
99
|
offset: int,
|
|
97
100
|
original_size: int,
|
|
98
|
-
part_number:
|
|
99
|
-
reference_count:
|
|
100
|
-
hash:
|
|
101
|
+
part_number: int | None = None,
|
|
102
|
+
reference_count: int | None = None,
|
|
103
|
+
hash: bytes | None = None,
|
|
101
104
|
):
|
|
102
105
|
self.wim = wim
|
|
103
106
|
self.size = size
|
|
@@ -149,8 +152,8 @@ class Resource:
|
|
|
149
152
|
if decompressor is None:
|
|
150
153
|
raise NotImplementedError(f"Compression algorithm not yet supported: {compression_flags}")
|
|
151
154
|
return CompressedStream(self.wim.fh, self.offset, self.size, self.original_size, decompressor)
|
|
152
|
-
|
|
153
|
-
|
|
155
|
+
|
|
156
|
+
return RelativeStream(self.wim.fh, self.offset, self.size)
|
|
154
157
|
|
|
155
158
|
|
|
156
159
|
class Image:
|
|
@@ -165,7 +168,7 @@ class Image:
|
|
|
165
168
|
def __repr__(self) -> str:
|
|
166
169
|
return "<Image>"
|
|
167
170
|
|
|
168
|
-
def get(self, path: str, entry:
|
|
171
|
+
def get(self, path: str, entry: DirectoryEntry | None = None) -> DirectoryEntry:
|
|
169
172
|
# Programmatically we will often use the `/` separator, so replace it with the native path separator of NTFS
|
|
170
173
|
# `/` is an illegal character in NTFS filenames, so it's safe to replace
|
|
171
174
|
search_path = path.replace("/", "\\")
|
|
@@ -367,8 +370,8 @@ class DirectoryEntry:
|
|
|
367
370
|
|
|
368
371
|
if resource := self.image.wim.resources.get(stream_hash):
|
|
369
372
|
return resource.open()
|
|
370
|
-
|
|
371
|
-
|
|
373
|
+
|
|
374
|
+
raise FileNotFoundError(f"Unable to find resource for directory entry {self}")
|
|
372
375
|
|
|
373
376
|
|
|
374
377
|
class ReparsePoint:
|
|
@@ -391,7 +394,7 @@ class ReparsePoint:
|
|
|
391
394
|
self._buf = fh.read()
|
|
392
395
|
|
|
393
396
|
@property
|
|
394
|
-
def substitute_name(self) ->
|
|
397
|
+
def substitute_name(self) -> str | None:
|
|
395
398
|
if not self.info:
|
|
396
399
|
return None
|
|
397
400
|
|
|
@@ -400,7 +403,7 @@ class ReparsePoint:
|
|
|
400
403
|
return self._buf[offset : offset + length].decode("utf-16-le")
|
|
401
404
|
|
|
402
405
|
@property
|
|
403
|
-
def print_name(self) ->
|
|
406
|
+
def print_name(self) -> str | None:
|
|
404
407
|
if not self.info:
|
|
405
408
|
return None
|
|
406
409
|
|
|
@@ -446,7 +449,7 @@ class CompressedStream(AlignedStream):
|
|
|
446
449
|
else:
|
|
447
450
|
entry_size = "Q" if original_size > 0xFFFFFFFF else "I"
|
|
448
451
|
pattern = f"<{num_chunks}{entry_size}"
|
|
449
|
-
self._chunks = (0,
|
|
452
|
+
self._chunks = (0, *struct.unpack(pattern, fh.read(struct.calcsize(pattern))))
|
|
450
453
|
|
|
451
454
|
self._data_offset = fh.tell()
|
|
452
455
|
|
|
@@ -1,10 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import hashlib
|
|
2
4
|
import tarfile
|
|
3
5
|
from bisect import bisect_right
|
|
6
|
+
from functools import cached_property
|
|
7
|
+
from typing import TYPE_CHECKING, BinaryIO
|
|
4
8
|
from xml.etree import ElementTree
|
|
5
9
|
|
|
6
10
|
from dissect.util.stream import AlignedStream
|
|
7
11
|
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from collections.abc import Iterator
|
|
14
|
+
|
|
8
15
|
BLOCK_SIZE = 1024 * 1024
|
|
9
16
|
|
|
10
17
|
|
|
@@ -14,20 +21,17 @@ class XVA:
|
|
|
14
21
|
XenCenter export format. Basically a tar file with "blocks" of 1MB.
|
|
15
22
|
"""
|
|
16
23
|
|
|
17
|
-
def __init__(self, fh):
|
|
24
|
+
def __init__(self, fh: BinaryIO):
|
|
18
25
|
# We don't have to cache tar members, tarfile already does that for us
|
|
19
|
-
self.tar = tarfile.open(fileobj=fh)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
return self._ova
|
|
29
|
-
|
|
30
|
-
def disks(self):
|
|
26
|
+
self.tar = tarfile.open(fileobj=fh) # noqa: SIM115
|
|
27
|
+
|
|
28
|
+
@cached_property
|
|
29
|
+
def ova(self) -> ElementTree.Element:
|
|
30
|
+
ova_member = self.tar.getmember("ova.xml")
|
|
31
|
+
ova_fh = self.tar.extractfile(ova_member)
|
|
32
|
+
return ElementTree.fromstring(ova_fh.read())
|
|
33
|
+
|
|
34
|
+
def disks(self) -> list[str]:
|
|
31
35
|
return [
|
|
32
36
|
el.text
|
|
33
37
|
for el in self.ova.findall(
|
|
@@ -35,7 +39,7 @@ class XVA:
|
|
|
35
39
|
)
|
|
36
40
|
]
|
|
37
41
|
|
|
38
|
-
def open(self, ref, verify=False):
|
|
42
|
+
def open(self, ref: str, verify: bool = False) -> XVAStream:
|
|
39
43
|
size = int(
|
|
40
44
|
self.ova.find(f"*//member/name[.='id']/../value[.='{ref}']/../..//name[.='virtual_size']/../value").text
|
|
41
45
|
)
|
|
@@ -55,7 +59,7 @@ class XVAStream(AlignedStream):
|
|
|
55
59
|
data for that current offset. For this reason we build a lookup list with offsets.
|
|
56
60
|
"""
|
|
57
61
|
|
|
58
|
-
def __init__(self, xva, ref, size, verify=False):
|
|
62
|
+
def __init__(self, xva: XVA, ref: str, size: int, verify: bool = False):
|
|
59
63
|
self.xva = xva
|
|
60
64
|
self.ref = ref
|
|
61
65
|
self.verify = verify
|
|
@@ -79,7 +83,7 @@ class XVAStream(AlignedStream):
|
|
|
79
83
|
|
|
80
84
|
super().__init__(size, align=BLOCK_SIZE)
|
|
81
85
|
|
|
82
|
-
def _read(self, offset, length):
|
|
86
|
+
def _read(self, offset: int, length: int) -> bytes:
|
|
83
87
|
result = []
|
|
84
88
|
|
|
85
89
|
while length > 0:
|
|
@@ -102,8 +106,8 @@ class XVAStream(AlignedStream):
|
|
|
102
106
|
and hashlib.sha1(buf).hexdigest() != self.xva.tar.extractfile(checksum_member).read().decode()
|
|
103
107
|
):
|
|
104
108
|
raise ValueError(f"Invalid checksum for {checksum_member.name}")
|
|
105
|
-
|
|
106
|
-
|
|
109
|
+
|
|
110
|
+
raise NotImplementedError(f"Unsupported checksum: {checksum_member.name}")
|
|
107
111
|
|
|
108
112
|
result.append(buf)
|
|
109
113
|
|
|
@@ -113,7 +117,7 @@ class XVAStream(AlignedStream):
|
|
|
113
117
|
return b"".join(result)
|
|
114
118
|
|
|
115
119
|
|
|
116
|
-
def _iter_block_files(xva, ref):
|
|
120
|
+
def _iter_block_files(xva: XVA, ref: str) -> Iterator[tuple[int, tarfile.TarInfo, tarfile.TarInfo]]:
|
|
117
121
|
member_index = None
|
|
118
122
|
block_member = None
|
|
119
123
|
checksum_member = None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: dissect.archive
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.dev2
|
|
4
4
|
Summary: A Dissect module implementing parsers for various archive and backup formats
|
|
5
5
|
Author-email: Dissect Team <dissect@fox-it.com>
|
|
6
6
|
License: Affero General Public License v3
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
.git-blame-ignore-revs
|
|
1
2
|
.gitattributes
|
|
2
3
|
COPYRIGHT
|
|
3
4
|
LICENSE
|
|
@@ -22,6 +23,7 @@ dissect/archive/wim.py
|
|
|
22
23
|
dissect/archive/xva.py
|
|
23
24
|
dissect/archive/tools/__init__.py
|
|
24
25
|
dissect/archive/tools/backup.py
|
|
26
|
+
tests/__init__.py
|
|
25
27
|
tests/conftest.py
|
|
26
28
|
tests/test_exceptions.py
|
|
27
29
|
tests/test_vbk.py
|
|
@@ -50,13 +50,56 @@ vma-extract = "dissect.archive.tools.backup:main"
|
|
|
50
50
|
vbk-extract = "dissect.archive.tools.backup:main"
|
|
51
51
|
backup-extract = "dissect.archive.tools.backup:main"
|
|
52
52
|
|
|
53
|
-
[tool.
|
|
53
|
+
[tool.ruff]
|
|
54
54
|
line-length = 120
|
|
55
|
+
required-version = ">=0.9.0"
|
|
55
56
|
|
|
56
|
-
[tool.
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
57
|
+
[tool.ruff.format]
|
|
58
|
+
docstring-code-format = true
|
|
59
|
+
|
|
60
|
+
[tool.ruff.lint]
|
|
61
|
+
select = [
|
|
62
|
+
"F",
|
|
63
|
+
"E",
|
|
64
|
+
"W",
|
|
65
|
+
"I",
|
|
66
|
+
"UP",
|
|
67
|
+
"YTT",
|
|
68
|
+
"ANN",
|
|
69
|
+
"B",
|
|
70
|
+
"C4",
|
|
71
|
+
"DTZ",
|
|
72
|
+
"T10",
|
|
73
|
+
"FA",
|
|
74
|
+
"ISC",
|
|
75
|
+
"G",
|
|
76
|
+
"INP",
|
|
77
|
+
"PIE",
|
|
78
|
+
"PYI",
|
|
79
|
+
"PT",
|
|
80
|
+
"Q",
|
|
81
|
+
"RSE",
|
|
82
|
+
"RET",
|
|
83
|
+
"SLOT",
|
|
84
|
+
"SIM",
|
|
85
|
+
"TID",
|
|
86
|
+
"TCH",
|
|
87
|
+
"PTH",
|
|
88
|
+
"PLC",
|
|
89
|
+
"TRY",
|
|
90
|
+
"FLY",
|
|
91
|
+
"PERF",
|
|
92
|
+
"FURB",
|
|
93
|
+
"RUF",
|
|
94
|
+
]
|
|
95
|
+
ignore = ["E203", "B904", "UP024", "ANN002", "ANN003", "ANN204", "ANN401", "SIM105", "TRY003"]
|
|
96
|
+
|
|
97
|
+
[tool.ruff.lint.per-file-ignores]
|
|
98
|
+
"tests/docs/**" = ["INP001"]
|
|
99
|
+
|
|
100
|
+
[tool.ruff.lint.isort]
|
|
101
|
+
known-first-party = ["dissect.archive"]
|
|
102
|
+
known-third-party = ["dissect"]
|
|
60
103
|
|
|
61
104
|
[tool.setuptools]
|
|
62
105
|
license-files = ["LICENSE", "COPYRIGHT"]
|
|
File without changes
|
|
@@ -1,16 +1,21 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import gzip
|
|
2
|
-
import
|
|
3
|
-
from typing import
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import TYPE_CHECKING, BinaryIO
|
|
4
6
|
|
|
5
7
|
import pytest
|
|
6
8
|
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from collections.abc import Iterator
|
|
11
|
+
|
|
7
12
|
|
|
8
|
-
def absolute_path(filename) ->
|
|
9
|
-
return
|
|
13
|
+
def absolute_path(filename: str) -> Path:
|
|
14
|
+
return Path(__file__).parent / filename
|
|
10
15
|
|
|
11
16
|
|
|
12
17
|
def open_file(name: str, mode: str = "rb") -> Iterator[BinaryIO]:
|
|
13
|
-
with
|
|
18
|
+
with absolute_path(name).open(mode) as f:
|
|
14
19
|
yield f
|
|
15
20
|
|
|
16
21
|
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import hashlib
|
|
2
4
|
import struct
|
|
3
5
|
from typing import BinaryIO
|
|
@@ -14,6 +16,7 @@ from dissect.archive.vbk import (
|
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
def test_vbk_version_9(vbk9: BinaryIO) -> None:
|
|
19
|
+
"""test VBK parsing for version 9 files"""
|
|
17
20
|
vbk = VBK(vbk9)
|
|
18
21
|
|
|
19
22
|
assert vbk.format_version == 9
|
|
@@ -49,6 +52,7 @@ def test_vbk_version_9(vbk9: BinaryIO) -> None:
|
|
|
49
52
|
|
|
50
53
|
|
|
51
54
|
def test_vbk_version_13(vbk13: BinaryIO) -> None:
|
|
55
|
+
"""test VBK parsing for version 13 files"""
|
|
52
56
|
vbk = VBK(vbk13)
|
|
53
57
|
|
|
54
58
|
assert vbk.format_version == 13
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import hashlib
|
|
2
4
|
from typing import BinaryIO
|
|
3
5
|
|
|
@@ -11,7 +13,7 @@ def test_wim(basic_wim: BinaryIO) -> None:
|
|
|
11
13
|
assert len(images) == 1
|
|
12
14
|
|
|
13
15
|
image = images[0]
|
|
14
|
-
assert sorted(
|
|
16
|
+
assert sorted(image.root.listdir().keys()) == ["ads.txt", "dir", "file.txt", "link.txt"]
|
|
15
17
|
|
|
16
18
|
entry = image.get("file.txt")
|
|
17
19
|
assert entry.is_file()
|
|
@@ -42,7 +44,7 @@ def test_wim(basic_wim: BinaryIO) -> None:
|
|
|
42
44
|
assert not entry.is_file()
|
|
43
45
|
assert entry.is_dir()
|
|
44
46
|
assert not entry.is_reparse_point()
|
|
45
|
-
assert sorted(
|
|
47
|
+
assert sorted(entry.listdir().keys()) == ["another.txt"]
|
|
46
48
|
|
|
47
49
|
entry = image.get("dir/another.txt")
|
|
48
50
|
assert entry.is_file()
|
|
@@ -32,23 +32,17 @@ commands =
|
|
|
32
32
|
[testenv:fix]
|
|
33
33
|
package = skip
|
|
34
34
|
deps =
|
|
35
|
-
|
|
36
|
-
isort==5.11.4
|
|
35
|
+
ruff==0.9.2
|
|
37
36
|
commands =
|
|
38
|
-
|
|
39
|
-
isort dissect tests
|
|
37
|
+
ruff format dissect tests
|
|
40
38
|
|
|
41
39
|
[testenv:lint]
|
|
42
40
|
package = skip
|
|
43
41
|
deps =
|
|
44
|
-
|
|
45
|
-
flake8
|
|
46
|
-
flake8-black
|
|
47
|
-
flake8-isort
|
|
48
|
-
isort==5.11.4
|
|
42
|
+
ruff==0.9.2
|
|
49
43
|
vermin
|
|
50
44
|
commands =
|
|
51
|
-
|
|
45
|
+
ruff check dissect tests
|
|
52
46
|
vermin -t=3.9- --no-tips --lint dissect tests
|
|
53
47
|
|
|
54
48
|
[flake8]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect.archive.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect.archive.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
{dissect_archive-1.4.dev1 → dissect_archive-1.5.dev2}/dissect.archive.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|