archae 2026.2.0b1__py3-none-any.whl → 2026.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- archae/cli.py +1 -2
- archae/config.py +33 -4
- archae/default_settings.toml +1 -0
- archae/extractor.py +136 -59
- archae/options.yaml +7 -0
- archae/util/archiver/peazip.py +9 -2
- archae/util/archiver/seven_zip.py +17 -3
- archae/util/archiver/unar.py +10 -3
- archae/util/file_tracker.py +14 -3
- archae/util/lists.py +58 -0
- {archae-2026.2.0b1.dist-info → archae-2026.3.dist-info}/METADATA +5 -4
- archae-2026.3.dist-info/RECORD +24 -0
- {archae-2026.2.0b1.dist-info → archae-2026.3.dist-info}/WHEEL +1 -1
- archae-2026.2.0b1.dist-info/RECORD +0 -23
- {archae-2026.2.0b1.dist-info → archae-2026.3.dist-info}/entry_points.txt +0 -0
archae/cli.py
CHANGED
|
@@ -9,7 +9,7 @@ from pathlib import Path
|
|
|
9
9
|
|
|
10
10
|
import rich_click as click
|
|
11
11
|
|
|
12
|
-
from archae.config import apply_options,
|
|
12
|
+
from archae.config import apply_options, get_options
|
|
13
13
|
from archae.extractor import ArchiveExtractor
|
|
14
14
|
from archae.util.tool_manager import ToolManager
|
|
15
15
|
|
|
@@ -72,7 +72,6 @@ def extract(
|
|
|
72
72
|
# Apply any options from the command line, then convert any convertible settings
|
|
73
73
|
if options:
|
|
74
74
|
apply_options(options)
|
|
75
|
-
convert_settings()
|
|
76
75
|
|
|
77
76
|
# Locate external tools
|
|
78
77
|
ToolManager.locate_tools()
|
archae/config.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Runtime config management (default, userconfig and envvars)."""
|
|
2
2
|
|
|
3
|
+
import ast
|
|
3
4
|
import importlib
|
|
4
5
|
import typing
|
|
5
6
|
from pathlib import Path
|
|
@@ -66,6 +67,8 @@ def get_converter(converter_def: str) -> typing.Callable:
|
|
|
66
67
|
return float
|
|
67
68
|
if converter_def == "int":
|
|
68
69
|
return int
|
|
70
|
+
if converter_def == "bool":
|
|
71
|
+
return ast.literal_eval
|
|
69
72
|
|
|
70
73
|
# Split the definition into module path and class name
|
|
71
74
|
module_name, class_name = converter_def.split(":")
|
|
@@ -97,11 +100,37 @@ def apply_options(option_list: list[tuple[str, str]]) -> None:
|
|
|
97
100
|
pass
|
|
98
101
|
|
|
99
102
|
|
|
100
|
-
def convert_settings() ->
|
|
101
|
-
"""Convert settings using their defined converters.
|
|
103
|
+
def convert_settings(settings_dict: dict) -> dict:
|
|
104
|
+
"""Convert settings using their defined converters.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
settings_dict (dict): The settings dictionary to convert.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
dict: The converted settings dictionary.
|
|
111
|
+
"""
|
|
102
112
|
options = get_options()
|
|
103
113
|
for key in options:
|
|
104
114
|
option_def = options[key]
|
|
105
|
-
if "converter" in option_def:
|
|
115
|
+
if "converter" in option_def and key in settings_dict:
|
|
106
116
|
converter = get_converter(option_def["converter"])
|
|
107
|
-
|
|
117
|
+
settings_dict[key] = converter(settings_dict[key])
|
|
118
|
+
return settings_dict
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def get_settings() -> dict:
|
|
122
|
+
"""Get the current settings after converting them.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
dict: The current settings as a dictionary.
|
|
126
|
+
"""
|
|
127
|
+
return convert_settings(dict(settings))
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def get_default_settings() -> dict:
|
|
131
|
+
"""Get the default settings after converting them.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
dict: The default settings as a dictionary.
|
|
135
|
+
"""
|
|
136
|
+
return convert_settings(dict(default_settings))
|
archae/default_settings.toml
CHANGED
archae/extractor.py
CHANGED
|
@@ -9,7 +9,7 @@ from typing import TYPE_CHECKING
|
|
|
9
9
|
|
|
10
10
|
import magic
|
|
11
11
|
|
|
12
|
-
from archae.config import apply_options,
|
|
12
|
+
from archae.config import apply_options, get_default_settings, get_settings
|
|
13
13
|
from archae.util.file_tracker import FileTracker
|
|
14
14
|
from archae.util.tool_manager import ToolManager
|
|
15
15
|
|
|
@@ -17,6 +17,7 @@ if TYPE_CHECKING:
|
|
|
17
17
|
from pathlib import Path
|
|
18
18
|
|
|
19
19
|
from archae.util.archiver.base_archiver import BaseArchiver
|
|
20
|
+
from archae.util.lists import skip_delete_extensions, skip_delete_mimetypes
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class WarningAccumulator(logging.Handler):
|
|
@@ -55,6 +56,8 @@ class ArchiveExtractor:
|
|
|
55
56
|
shutil.rmtree(self.extract_dir)
|
|
56
57
|
self.extract_dir.mkdir(exist_ok=True)
|
|
57
58
|
self.file_tracker = FileTracker()
|
|
59
|
+
if ToolManager.get_tools() == {}:
|
|
60
|
+
ToolManager.locate_tools()
|
|
58
61
|
|
|
59
62
|
def handle_file(self, file_path: Path) -> None:
|
|
60
63
|
"""Handle a file given its path.
|
|
@@ -77,74 +80,75 @@ class ArchiveExtractor:
|
|
|
77
80
|
file_size_bytes = file_path.stat().st_size
|
|
78
81
|
self.file_tracker.track_file(base_hash, file_size_bytes)
|
|
79
82
|
self.file_tracker.track_file_path(base_hash, file_path)
|
|
80
|
-
self.file_tracker.
|
|
81
|
-
|
|
82
|
-
)
|
|
83
|
-
self.file_tracker.add_metadata_to_hash(
|
|
83
|
+
self.file_tracker.add_metadata(base_hash, "type", magic.from_file(file_path))
|
|
84
|
+
self.file_tracker.add_metadata(
|
|
84
85
|
base_hash, "type_mime", magic.from_file(file_path, mime=True)
|
|
85
86
|
)
|
|
86
87
|
extension = file_path.suffix.lstrip(".").lower()
|
|
87
|
-
self.file_tracker.
|
|
88
|
+
self.file_tracker.add_metadata(base_hash, "extension", extension)
|
|
88
89
|
is_file_archive = self._is_archive(base_hash)
|
|
89
|
-
self.file_tracker.
|
|
90
|
+
self.file_tracker.add_metadata(base_hash, "is_archive", is_file_archive)
|
|
90
91
|
if is_file_archive:
|
|
91
|
-
|
|
92
|
+
settings_dict = get_settings()
|
|
93
|
+
if settings_dict["MAX_DEPTH"] == 0 or depth < settings_dict["MAX_DEPTH"]:
|
|
92
94
|
archiver = self._get_archiver_for_file(base_hash)
|
|
93
|
-
if archiver:
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
95
|
+
if not archiver:
|
|
96
|
+
logger.warning(
|
|
97
|
+
"NO_ARCHIVER: No suitable archiver found for file: %s",
|
|
98
|
+
file_path,
|
|
97
99
|
)
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
100
|
+
return
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
extracted_size = archiver.get_archive_uncompressed_size(file_path)
|
|
104
|
+
except RuntimeError as e:
|
|
105
|
+
logger.warning(
|
|
106
|
+
"SIZE_RETRIEVAL_FAILED: Could not retrieve size for archive %s: %s",
|
|
107
|
+
file_path,
|
|
108
|
+
str(e),
|
|
101
109
|
)
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
extracted_size,
|
|
117
|
-
self.file_tracker.get_tracked_file_size(),
|
|
118
|
-
settings["MAX_TOTAL_SIZE_BYTES"],
|
|
119
|
-
)
|
|
120
|
-
elif compression_ratio < settings["MIN_ARCHIVE_RATIO"]:
|
|
121
|
-
logger.warning(
|
|
122
|
-
"MIN_ARCHIVE_RATIO: Skipped archive %s because compression ratio %.5f is less than MIN_ARCHIVE_RATIO %s",
|
|
110
|
+
return
|
|
111
|
+
self.file_tracker.add_metadata(
|
|
112
|
+
base_hash, "extracted_size", extracted_size
|
|
113
|
+
)
|
|
114
|
+
compression_ratio = extracted_size / file_size_bytes
|
|
115
|
+
self.file_tracker.add_metadata(
|
|
116
|
+
base_hash, "overall_compression_ratio", compression_ratio
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
if self.__should_extract_archive(base_hash, file_path):
|
|
120
|
+
try:
|
|
121
|
+
extraction_dir = self.extract_dir / base_hash
|
|
122
|
+
logger.info(
|
|
123
|
+
"Extracting archive %s to %s",
|
|
123
124
|
file_path,
|
|
124
|
-
|
|
125
|
-
settings["MIN_ARCHIVE_RATIO"],
|
|
125
|
+
extraction_dir,
|
|
126
126
|
)
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
< settings["MIN_DISK_FREE_SPACE"]
|
|
130
|
-
):
|
|
127
|
+
archiver.extract_archive(file_path, extraction_dir)
|
|
128
|
+
except RuntimeError as e:
|
|
131
129
|
logger.warning(
|
|
132
|
-
"
|
|
130
|
+
"EXTRACTION_FAILED: Extraction failed for archive %s: %s",
|
|
133
131
|
file_path,
|
|
134
|
-
|
|
135
|
-
self.extract_dir,
|
|
132
|
+
str(e),
|
|
136
133
|
)
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
134
|
+
return
|
|
135
|
+
child_files = self._list_child_files(extraction_dir)
|
|
136
|
+
for child_file in child_files:
|
|
137
|
+
self.__handle_file(child_file, depth + 1)
|
|
138
|
+
|
|
139
|
+
if self.__should_delete_archive(base_hash, file_path):
|
|
140
|
+
try:
|
|
141
|
+
file_path.unlink()
|
|
142
|
+
logger.info(
|
|
143
|
+
"Deleted archive %s after extraction as per settings.",
|
|
144
|
+
file_path,
|
|
145
|
+
)
|
|
146
|
+
except (PermissionError, OSError) as e:
|
|
147
|
+
logger.warning(
|
|
148
|
+
"DELETE_FAILED: Could not delete archive %s after extraction: %s",
|
|
149
|
+
file_path,
|
|
150
|
+
str(e),
|
|
151
|
+
)
|
|
148
152
|
else:
|
|
149
153
|
logger.warning(
|
|
150
154
|
"MAX_DEPTH: File %s is not extracted; max depth reached.", file_path
|
|
@@ -160,7 +164,7 @@ class ArchiveExtractor:
|
|
|
160
164
|
bool: True if the file is an archive, otherwise False.
|
|
161
165
|
|
|
162
166
|
"""
|
|
163
|
-
metadata = self.file_tracker.
|
|
167
|
+
metadata = self.file_tracker.get_file_metadata(file_hash)
|
|
164
168
|
mime_type = metadata.get("type_mime", "").lower()
|
|
165
169
|
extension = metadata.get("extension", "").lower()
|
|
166
170
|
|
|
@@ -179,7 +183,7 @@ class ArchiveExtractor:
|
|
|
179
183
|
Returns:
|
|
180
184
|
str | None: The name of the archiver tool if found, otherwise None.
|
|
181
185
|
"""
|
|
182
|
-
metadata = self.file_tracker.
|
|
186
|
+
metadata = self.file_tracker.get_file_metadata(file_hash)
|
|
183
187
|
mime_type = metadata.get("type_mime", "").lower()
|
|
184
188
|
extension = metadata.get("extension", "").lower()
|
|
185
189
|
|
|
@@ -235,7 +239,7 @@ class ArchiveExtractor:
|
|
|
235
239
|
Returns:
|
|
236
240
|
dict: Dictionary of default settings.
|
|
237
241
|
"""
|
|
238
|
-
return
|
|
242
|
+
return get_default_settings()
|
|
239
243
|
|
|
240
244
|
def apply_settings(self, option_list: list[tuple[str, str]]) -> None:
|
|
241
245
|
"""Apply a list of settings options.
|
|
@@ -247,3 +251,76 @@ class ArchiveExtractor:
|
|
|
247
251
|
extractor.apply_settings([("MAX_ARCHIVE_SIZE_BYTES", "5000000000")])
|
|
248
252
|
"""
|
|
249
253
|
apply_options(option_list)
|
|
254
|
+
|
|
255
|
+
def __should_extract_archive(self, file_hash: str, file_path: Path) -> bool:
|
|
256
|
+
"""Determine whether an archive should be extracted based on its metadata and current settings."""
|
|
257
|
+
settings_dict = get_settings()
|
|
258
|
+
metadata = self.file_tracker.get_file_metadata(file_hash)
|
|
259
|
+
extracted_size = metadata.get("extracted_size", 0)
|
|
260
|
+
if extracted_size > settings_dict["MAX_ARCHIVE_SIZE_BYTES"]:
|
|
261
|
+
logger.warning(
|
|
262
|
+
"MAX_ARCHIVE_SIZE_BYTES: Skipped archive %s because expected size %s is greater than MAX_ARCHIVE_SIZE_BYTES %s",
|
|
263
|
+
file_path,
|
|
264
|
+
extracted_size,
|
|
265
|
+
settings_dict["MAX_ARCHIVE_SIZE_BYTES"],
|
|
266
|
+
)
|
|
267
|
+
return False
|
|
268
|
+
|
|
269
|
+
total_extracted = self.file_tracker.get_total_tracked_file_size()
|
|
270
|
+
if total_extracted + extracted_size > settings_dict["MAX_TOTAL_SIZE_BYTES"]:
|
|
271
|
+
logger.warning(
|
|
272
|
+
"MAX_TOTAL_SIZE_BYTES: Skipped archive %s because expected size %s + current tracked files %s is greater than MAX_TOTAL_SIZE_BYTES %s",
|
|
273
|
+
file_path,
|
|
274
|
+
extracted_size,
|
|
275
|
+
total_extracted,
|
|
276
|
+
settings_dict["MAX_TOTAL_SIZE_BYTES"],
|
|
277
|
+
)
|
|
278
|
+
return False
|
|
279
|
+
compression_ratio = metadata.get("overall_compression_ratio", 0)
|
|
280
|
+
if compression_ratio < settings_dict["MIN_ARCHIVE_RATIO"]:
|
|
281
|
+
logger.warning(
|
|
282
|
+
"MIN_ARCHIVE_RATIO: Skipped archive %s because compression ratio %.5f is less than MIN_ARCHIVE_RATIO %s",
|
|
283
|
+
file_path,
|
|
284
|
+
compression_ratio,
|
|
285
|
+
settings_dict["MIN_ARCHIVE_RATIO"],
|
|
286
|
+
)
|
|
287
|
+
return False
|
|
288
|
+
if (
|
|
289
|
+
shutil.disk_usage(self.extract_dir).free - extracted_size
|
|
290
|
+
< settings_dict["MIN_DISK_FREE_SPACE"]
|
|
291
|
+
):
|
|
292
|
+
logger.warning(
|
|
293
|
+
"MIN_DISK_FREE_SPACE: Skipped archive %s because extracting it would leave less than MIN_DISK_FREE_SPACE %s bytes free at extraction location %s",
|
|
294
|
+
file_path,
|
|
295
|
+
settings_dict["MIN_DISK_FREE_SPACE"],
|
|
296
|
+
self.extract_dir,
|
|
297
|
+
)
|
|
298
|
+
return False
|
|
299
|
+
return True
|
|
300
|
+
|
|
301
|
+
def __should_delete_archive(self, file_hash: str, file_path: Path) -> bool:
|
|
302
|
+
"""Determine whether an archive should be deleted after extraction based on its metadata and current settings."""
|
|
303
|
+
settings_dict = get_settings()
|
|
304
|
+
if not settings_dict["DELETE_ARCHIVES_AFTER_EXTRACTION"]:
|
|
305
|
+
return False
|
|
306
|
+
|
|
307
|
+
metadata = self.file_tracker.get_file_metadata(file_hash)
|
|
308
|
+
extension = metadata.get("extension", "").lower()
|
|
309
|
+
if extension in skip_delete_extensions:
|
|
310
|
+
logger.warning(
|
|
311
|
+
"SKIP_DELETE_EXTENSION: Archive %s not deleted after extraction due to its extension '%s' being in the skip list.",
|
|
312
|
+
file_path,
|
|
313
|
+
extension,
|
|
314
|
+
)
|
|
315
|
+
return False
|
|
316
|
+
|
|
317
|
+
mime_type = metadata.get("type_mime", "").lower()
|
|
318
|
+
if mime_type in skip_delete_mimetypes:
|
|
319
|
+
logger.warning(
|
|
320
|
+
"SKIP_DELETE_MIMETYPE: Archive %s not deleted after extraction due to its mime type '%s' being in the skip list.",
|
|
321
|
+
file_path,
|
|
322
|
+
mime_type,
|
|
323
|
+
)
|
|
324
|
+
return False
|
|
325
|
+
|
|
326
|
+
return True
|
archae/options.yaml
CHANGED
archae/util/archiver/peazip.py
CHANGED
|
@@ -31,7 +31,7 @@ class PeazipArchiver(BaseArchiver):
|
|
|
31
31
|
"chi",
|
|
32
32
|
"chq",
|
|
33
33
|
"pptx",
|
|
34
|
-
"pptm
|
|
34
|
+
"pptm",
|
|
35
35
|
"xlsx",
|
|
36
36
|
"xlsm",
|
|
37
37
|
"docx",
|
|
@@ -145,7 +145,14 @@ class PeazipArchiver(BaseArchiver):
|
|
|
145
145
|
str(archive_path),
|
|
146
146
|
str(extract_dir),
|
|
147
147
|
]
|
|
148
|
-
|
|
148
|
+
try:
|
|
149
|
+
subprocess.run(command, check=True, capture_output=True, text=True) # noqa: S603
|
|
150
|
+
except subprocess.CalledProcessError as e:
|
|
151
|
+
msg = (
|
|
152
|
+
f"PeaZip extraction failed for archive {archive_path} "
|
|
153
|
+
f"with exit code {e.returncode}: {e.stderr}"
|
|
154
|
+
)
|
|
155
|
+
raise RuntimeError(msg) from e
|
|
149
156
|
|
|
150
157
|
def get_archive_uncompressed_size(self, archive_path: Path) -> int: # noqa: ARG002
|
|
151
158
|
"""Get the uncompressed size of the contents.
|
|
@@ -46,7 +46,7 @@ class SevenZipArchiver(BaseArchiver):
|
|
|
46
46
|
"zip",
|
|
47
47
|
"zipx",
|
|
48
48
|
"appimage",
|
|
49
|
-
"dmg
|
|
49
|
+
"dmg",
|
|
50
50
|
"img",
|
|
51
51
|
"arj",
|
|
52
52
|
"cpio",
|
|
@@ -176,7 +176,14 @@ class SevenZipArchiver(BaseArchiver):
|
|
|
176
176
|
str(archive_path),
|
|
177
177
|
f"-o{extract_dir!s}",
|
|
178
178
|
]
|
|
179
|
-
|
|
179
|
+
try:
|
|
180
|
+
subprocess.run(command, check=True, capture_output=True, text=True) # noqa: S603
|
|
181
|
+
except subprocess.CalledProcessError as e:
|
|
182
|
+
msg = (
|
|
183
|
+
f"7zip extraction failed for archive {archive_path} "
|
|
184
|
+
f"with exit code {e.returncode}: {e.stderr}"
|
|
185
|
+
)
|
|
186
|
+
raise RuntimeError(msg) from e
|
|
180
187
|
|
|
181
188
|
def get_archive_uncompressed_size(self, archive_path: Path) -> int:
|
|
182
189
|
"""Get the uncompressed size of the contents.
|
|
@@ -188,7 +195,14 @@ class SevenZipArchiver(BaseArchiver):
|
|
|
188
195
|
int: The size of the contents
|
|
189
196
|
"""
|
|
190
197
|
command: list[str] = [str(self.executable_path), "l", "-slt", str(archive_path)]
|
|
191
|
-
|
|
198
|
+
try:
|
|
199
|
+
result = subprocess.run(command, check=True, capture_output=True, text=True) # noqa: S603
|
|
200
|
+
except subprocess.CalledProcessError as e:
|
|
201
|
+
msg = (
|
|
202
|
+
f"7zip size retrieval failed for archive {archive_path} "
|
|
203
|
+
f"with exit code {e.returncode}: {e.stderr}"
|
|
204
|
+
)
|
|
205
|
+
raise RuntimeError(msg) from e
|
|
192
206
|
|
|
193
207
|
result_lines = str(result.stdout).splitlines()
|
|
194
208
|
exploded_size = 0
|
archae/util/archiver/unar.py
CHANGED
|
@@ -24,9 +24,9 @@ class UnarArchiver(BaseArchiver):
|
|
|
24
24
|
"deb",
|
|
25
25
|
"cab",
|
|
26
26
|
"pptx",
|
|
27
|
-
"pptm
|
|
28
|
-
"xlsx ",
|
|
27
|
+
"pptm",
|
|
29
28
|
"xlsm",
|
|
29
|
+
"xlsx",
|
|
30
30
|
"docx",
|
|
31
31
|
"docm",
|
|
32
32
|
"7z",
|
|
@@ -144,7 +144,14 @@ class UnarArchiver(BaseArchiver):
|
|
|
144
144
|
str(extract_dir),
|
|
145
145
|
str(archive_path),
|
|
146
146
|
]
|
|
147
|
-
|
|
147
|
+
try:
|
|
148
|
+
subprocess.run(command, check=True, capture_output=True, text=True) # noqa: S603
|
|
149
|
+
except subprocess.CalledProcessError as e:
|
|
150
|
+
msg = (
|
|
151
|
+
f"unar extraction failed for archive {archive_path} "
|
|
152
|
+
f"with exit code {e.returncode}: {e.stderr}"
|
|
153
|
+
)
|
|
154
|
+
raise RuntimeError(msg) from e
|
|
148
155
|
|
|
149
156
|
def get_archive_uncompressed_size(self, archive_path: Path) -> int: # noqa: ARG002
|
|
150
157
|
"""Get the uncompressed size of the contents.
|
archae/util/file_tracker.py
CHANGED
|
@@ -39,7 +39,18 @@ class FileTracker:
|
|
|
39
39
|
"""
|
|
40
40
|
return file_hash in self.tracked_files
|
|
41
41
|
|
|
42
|
-
def
|
|
42
|
+
def get_file_size(self, file_hash: str) -> int:
|
|
43
|
+
"""Get the size for a tracked file by its hash.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
file_hash (str): The hash of the file.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
int: The size of the tracked file.
|
|
50
|
+
"""
|
|
51
|
+
return self.tracked_files.get(file_hash, {}).get("size", 0)
|
|
52
|
+
|
|
53
|
+
def get_file_metadata(self, file_hash: str) -> dict:
|
|
43
54
|
"""Get metadata for a tracked file by its hash.
|
|
44
55
|
|
|
45
56
|
Args:
|
|
@@ -63,7 +74,7 @@ class FileTracker:
|
|
|
63
74
|
if file_path not in self.tracked_files[file_hash]["paths"]:
|
|
64
75
|
self.tracked_files[file_hash]["paths"].append(file_path)
|
|
65
76
|
|
|
66
|
-
def
|
|
77
|
+
def add_metadata(self, file_hash: str, key: str, value: Any) -> None:
|
|
67
78
|
"""Add metadata to a tracked file.
|
|
68
79
|
|
|
69
80
|
Args:
|
|
@@ -73,7 +84,7 @@ class FileTracker:
|
|
|
73
84
|
"""
|
|
74
85
|
self.tracked_files[file_hash]["metadata"][key] = value
|
|
75
86
|
|
|
76
|
-
def
|
|
87
|
+
def get_total_tracked_file_size(self) -> int:
|
|
77
88
|
"""Get the total size of all tracked files.
|
|
78
89
|
|
|
79
90
|
Returns:
|
archae/util/lists.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Helper lists for various purposes."""
|
|
2
|
+
|
|
3
|
+
skip_delete_extensions = [
|
|
4
|
+
"aar",
|
|
5
|
+
"appimage",
|
|
6
|
+
"cab",
|
|
7
|
+
"chi",
|
|
8
|
+
"chm",
|
|
9
|
+
"chq",
|
|
10
|
+
"chw",
|
|
11
|
+
"crx",
|
|
12
|
+
"deb",
|
|
13
|
+
"docm",
|
|
14
|
+
"docx",
|
|
15
|
+
"edb",
|
|
16
|
+
"edp",
|
|
17
|
+
"edr",
|
|
18
|
+
"esd",
|
|
19
|
+
"exe",
|
|
20
|
+
"ipa",
|
|
21
|
+
"iso",
|
|
22
|
+
"lib",
|
|
23
|
+
"msi",
|
|
24
|
+
"nsi",
|
|
25
|
+
"nsis",
|
|
26
|
+
"pptm",
|
|
27
|
+
"pptx",
|
|
28
|
+
"rpm",
|
|
29
|
+
"s7z",
|
|
30
|
+
"sitx",
|
|
31
|
+
"swm",
|
|
32
|
+
"ear",
|
|
33
|
+
"jar",
|
|
34
|
+
"war",
|
|
35
|
+
"xlsm",
|
|
36
|
+
"xlsx",
|
|
37
|
+
"xpi",
|
|
38
|
+
"zipx",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
skip_delete_mimetypes = [
|
|
43
|
+
"application/java-archive",
|
|
44
|
+
"application/vnd.android.package-archive",
|
|
45
|
+
"application/vnd.debian.binary-package",
|
|
46
|
+
"application/vnd.ms-cab-compressed",
|
|
47
|
+
"application/vnd.ms-htmlhelp",
|
|
48
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
49
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
50
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
51
|
+
"application/x-chrome-extension",
|
|
52
|
+
"application/x-iso9660-image",
|
|
53
|
+
"application/x-ole-storage",
|
|
54
|
+
"application/x-rpm",
|
|
55
|
+
"application/x-sitx",
|
|
56
|
+
"application/x-stuffitx",
|
|
57
|
+
"application/x-xpinstall",
|
|
58
|
+
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: archae
|
|
3
|
-
Version: 2026.
|
|
3
|
+
Version: 2026.3
|
|
4
4
|
Summary: Archae explodes archives.
|
|
5
5
|
Keywords:
|
|
6
6
|
Author: Shawn McNaughton
|
|
@@ -57,7 +57,7 @@ Description-Content-Type: text/markdown
|
|
|
57
57
|
|
|
58
58
|

|
|
59
59
|
|
|
60
|
-
[](https://
|
|
60
|
+
[](https://pypi.org/project/archae/)
|
|
61
61
|
[](https://pypi.org/project/archae/)
|
|
62
62
|
[](https://results.pre-commit.ci/latest/github/shawngmc/archae/main)
|
|
63
63
|
[](https://github.com/shawngmc/archae/actions/workflows/test.yml)
|
|
@@ -73,8 +73,8 @@ Archae explodes archives.
|
|
|
73
73
|
Every once and a while, I run into an issue: multiple layers of archives. The reasons vary, but examples would include:
|
|
74
74
|
|
|
75
75
|
- Searching for something in a ZIP of folders that contained a ZIP with a CD image in it
|
|
76
|
-
- Running a malware scan and finding an obscure archive format was missed and not even flagged
|
|
77
|
-
|
|
76
|
+
- Running a malware scan and finding an obscure archive format was missed and not even flagged.
|
|
77
|
+
- Meanwhile, I want to make sure I don't fill my disk, especially if an archive bomb (more commonly known as a ZIP bomb) has been jammed in somewhere. They're only funny the first time. :D
|
|
78
78
|
|
|
79
79
|
## Features
|
|
80
80
|
|
|
@@ -88,6 +88,7 @@ Every once and a while, I run into an issue: multiple layers of archives. The re
|
|
|
88
88
|
- MIN_ARCHIVE_RATIO - ensures very-high-compression-ratio archives are stopped
|
|
89
89
|
- MIN_DISK_FREE_SPACE - minimum free space at the extraction location
|
|
90
90
|
- MAX_DEPTH - allow setting a maximum archive depth to traverse
|
|
91
|
+
- DELETE_ARCHIVES_AFTER_EXTRACTION - delete pure archive types after deletion
|
|
91
92
|
|
|
92
93
|
## Installation
|
|
93
94
|
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
archae/__init__.py,sha256=O_HNvpNVsJ5LZPNPLvuHhHdOq0b6M0LnWqycB505mkQ,111
|
|
2
|
+
archae/__main__.py,sha256=l3eO5dEs1cR_hDziOpnW1PYzoRL2siYm81wgvftyigg,172
|
|
3
|
+
archae/cli.py,sha256=NqNUsncmXNYrfyE97jqCzKQ0tvOJz4Bm0_hbSnKzOm0,5698
|
|
4
|
+
archae/config.py,sha256=WQ1-qoEZhDoSJ5RIO3VlmMX_e5L_KXTfXEZaMh_LWsM,3673
|
|
5
|
+
archae/default_settings.toml,sha256=jHyXfD6D_SCzEHuhddSqz6HgwUOfHyzIAJxbbYO899A,271
|
|
6
|
+
archae/extractor.py,sha256=P-NaPh_YCbMfgWZfsHfPyxuAoWnao-xi6cFLqK1me4I,12900
|
|
7
|
+
archae/options.yaml,sha256=Dn9TwrWwo41aGtjepqbGLP7zcMdTuK_q8VTqEyFtj8I,1090
|
|
8
|
+
archae/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
archae/util/__init__.py,sha256=HlENK1XfnwnYMrlkvVJTjs-6W7RLLUJOoyFcOb7njUE,34
|
|
10
|
+
archae/util/archiver/__init__.py,sha256=baayr_wh2MfRaKKiVuaUxtYJcMzKR82Uf1amcU7QtzM,202
|
|
11
|
+
archae/util/archiver/base_archiver.py,sha256=xEHynzmhwZdMl2fEDj9h-YaH9ZQEeaugF4H_ILoOxTY,1629
|
|
12
|
+
archae/util/archiver/peazip.py,sha256=Kr5J1VUHWz_1Z8oiU7RZjwwOmQJbTPWO6kBMxrs0Inw,4331
|
|
13
|
+
archae/util/archiver/seven_zip.py,sha256=7W2eGv1aOeSrgU02hBNnDrlRCN6KbIl8_9iNw7s2wzA,5538
|
|
14
|
+
archae/util/archiver/unar.py,sha256=WQohelgxKcC4jTYcIuyzTCe9cZl65ru3i8IWnD5IziM,4280
|
|
15
|
+
archae/util/converter/file_size.py,sha256=BdcYzpdvCDq3YH1eYW1-zwnQCxv9mBzG4DuqHY2dt6o,1984
|
|
16
|
+
archae/util/enum/__init__.py,sha256=IvjtVopATKLAHDjOpblaExy2yXwIzweX0HoUrQWcpkM,109
|
|
17
|
+
archae/util/enum/byte_scale.py,sha256=5TZG1msPmJU9whZtKAywZtKOre6p2xMJ2y0gE4TG3OE,1593
|
|
18
|
+
archae/util/file_tracker.py,sha256=z8uiM0T-DwdiOK6L-o3KS89f9iuwlxBtDd5cQyHV8gU,3416
|
|
19
|
+
archae/util/lists.py,sha256=ID7Md4aUSJgcESLtSeNv2SUyZpJza9mSIOT-UK4DBLU,1151
|
|
20
|
+
archae/util/tool_manager.py,sha256=HU2xkmb_18XF5SFwFV5gUUcPddZVBYRdGB2PSWgWaqA,3974
|
|
21
|
+
archae-2026.3.dist-info/WHEEL,sha256=5DEXXimM34_d4Gx1AuF9ysMr1_maoEtGKjaILM3s4w4,80
|
|
22
|
+
archae-2026.3.dist-info/entry_points.txt,sha256=gGL_R78QELaTeyFGb-OuSnRuu4EUdT68EKmyrFno59o,48
|
|
23
|
+
archae-2026.3.dist-info/METADATA,sha256=1aXBaCJAUhintVrj4L3IiTEt2Qei1qp52889DnJ7zJ8,8051
|
|
24
|
+
archae-2026.3.dist-info/RECORD,,
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
archae/__init__.py,sha256=O_HNvpNVsJ5LZPNPLvuHhHdOq0b6M0LnWqycB505mkQ,111
|
|
2
|
-
archae/__main__.py,sha256=l3eO5dEs1cR_hDziOpnW1PYzoRL2siYm81wgvftyigg,172
|
|
3
|
-
archae/cli.py,sha256=ZYq7joe10y73dCwQY7666m-SY2dej_GVEmoYRyFpg7A,5739
|
|
4
|
-
archae/config.py,sha256=zGOwkPDfNLR-06ziZMXeLBamd-98AbNU0aTuoH7ibKQ,2952
|
|
5
|
-
archae/default_settings.toml,sha256=svBdN9QJm8UBBz6AEWpBp-gslSBcE5n00xZdl32KDEc,230
|
|
6
|
-
archae/extractor.py,sha256=CfYtsxuoXQ2rOLQIByJvLI9Y61U338dZoeb7ZBWAc_g,9948
|
|
7
|
-
archae/options.yaml,sha256=nJo7gOCuiS_fKd9C3iRwI3Eb_f4OT6mbdVkipqdISpk,937
|
|
8
|
-
archae/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
archae/util/__init__.py,sha256=HlENK1XfnwnYMrlkvVJTjs-6W7RLLUJOoyFcOb7njUE,34
|
|
10
|
-
archae/util/archiver/__init__.py,sha256=baayr_wh2MfRaKKiVuaUxtYJcMzKR82Uf1amcU7QtzM,202
|
|
11
|
-
archae/util/archiver/base_archiver.py,sha256=xEHynzmhwZdMl2fEDj9h-YaH9ZQEeaugF4H_ILoOxTY,1629
|
|
12
|
-
archae/util/archiver/peazip.py,sha256=XdhI2EQ3A9EH89KxPv-FcmjukwWcS96WtG-gwkha6hs,4022
|
|
13
|
-
archae/util/archiver/seven_zip.py,sha256=eck_cXMGFqwJ05ZKge3N0Ozy1XkKuGyE09s3jFcsMrQ,4951
|
|
14
|
-
archae/util/archiver/unar.py,sha256=xWTR7cpFXyofvtPI-7Z5Ski605WmJdLoAm6a81IriAc,3974
|
|
15
|
-
archae/util/converter/file_size.py,sha256=BdcYzpdvCDq3YH1eYW1-zwnQCxv9mBzG4DuqHY2dt6o,1984
|
|
16
|
-
archae/util/enum/__init__.py,sha256=IvjtVopATKLAHDjOpblaExy2yXwIzweX0HoUrQWcpkM,109
|
|
17
|
-
archae/util/enum/byte_scale.py,sha256=5TZG1msPmJU9whZtKAywZtKOre6p2xMJ2y0gE4TG3OE,1593
|
|
18
|
-
archae/util/file_tracker.py,sha256=HQb1l7j_Jy0qO6tqkB8jXM6P2o5fLB6Ih0J5U4JUyT8,3106
|
|
19
|
-
archae/util/tool_manager.py,sha256=HU2xkmb_18XF5SFwFV5gUUcPddZVBYRdGB2PSWgWaqA,3974
|
|
20
|
-
archae-2026.2.0b1.dist-info/WHEEL,sha256=fAguSjoiATBe7TNBkJwOjyL1Tt4wwiaQGtNtjRPNMQA,80
|
|
21
|
-
archae-2026.2.0b1.dist-info/entry_points.txt,sha256=gGL_R78QELaTeyFGb-OuSnRuu4EUdT68EKmyrFno59o,48
|
|
22
|
-
archae-2026.2.0b1.dist-info/METADATA,sha256=NiFftqzt93bRJoLbDw7jpWo5-Xt8JclNlSk8SezrroE,7976
|
|
23
|
-
archae-2026.2.0b1.dist-info/RECORD,,
|
|
File without changes
|