archae 2026.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
archae/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """Archae explodes archives."""
archae/__main__.py ADDED
@@ -0,0 +1,9 @@
1
+ """archae as a module entry point.
2
+
3
+ This allows archae to be executable from a git checkout or zip archive.
4
+ """
5
+
6
+ from .cli import cli
7
+
8
+ if __name__ == "__main__":
9
+ cli()
archae/cli.py ADDED
@@ -0,0 +1,374 @@
1
+ """Main CLI for archae."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import copy
6
+ import hashlib
7
+ import re
8
+ import shutil
9
+ from importlib import metadata
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING, Any
12
+
13
+ import magic
14
+ import rich_click as click
15
+
16
+ import archae.util.archiver
17
+ from archae.util.enum import ByteScale
18
+
19
+ if TYPE_CHECKING:
20
+ from archae.util.archiver.base_archiver import BaseArchiver
21
+
22
+ tools: dict[str, BaseArchiver] = {}
23
+
24
+
25
+ class FileSizeParamType(click.ParamType):
26
+ """Class to handle FileSize as a Click Param."""
27
+
28
+ name = "filesize"
29
+
30
+ @staticmethod
31
+ def compact_value(value: float) -> str:
32
+ """Convert a float of file size to a FileSizeParam string.
33
+
34
+ Args:
35
+ value (float): The size to convert
36
+
37
+ Returns:
38
+ str: A string with the most collapsed exact byte size rep.
39
+
40
+ """
41
+ exponent = 0
42
+ modulo: float = 0
43
+ while modulo == 0 and exponent < int(ByteScale.PETA.value):
44
+ modulo = value % 1024
45
+ if modulo == 0:
46
+ exponent += 1
47
+ value = int(value / 1024)
48
+ return f"{value}{ByteScale(exponent).prefix_letter}" # type: ignore[call-arg]
49
+
50
+ @staticmethod
51
+ def expand_value(value: str | int) -> int:
52
+ """Convert a FileSizeParam string or int to an int.
53
+
54
+ Args:
55
+ value (str | int): The value to convert as necessary.
56
+
57
+ Returns:
58
+ int: Size in bytes
59
+
60
+ """
61
+ try:
62
+ return int(value)
63
+ except ValueError:
64
+ pass
65
+
66
+ # Regex to split number and unit
67
+ match = re.match(r"^(\d+(?:\.\d+)?)\s*([KMGT]B?)$", str(value), re.IGNORECASE)
68
+ if not match:
69
+ msg = f"{value} is not a valid file size (e.g., 10G, 500M)"
70
+ raise ValueError(msg)
71
+
72
+ number, unit = match.groups()
73
+ number = float(number)
74
+ unit = unit.upper()
75
+
76
+ units = {
77
+ "K": 1024,
78
+ "KB": 1024,
79
+ "M": 1024**2,
80
+ "MB": 1024**2,
81
+ "G": 1024**3,
82
+ "GB": 1024**3,
83
+ "T": 1024**4,
84
+ "TB": 1024**4,
85
+ }
86
+
87
+ # Default to bytes if no specific unit multiplier, or assume B
88
+ return int(number * units.get(unit, 1))
89
+
90
+ def convert(self, value: click.Option, param: str, ctx: click.Context) -> int:
91
+ """Convert a FileSizeParam to an int.
92
+
93
+ Args:
94
+ value (click.Option): The value to convert as necessary.
95
+ param (str): The param we are validating.
96
+ ctx (click.Context): The click Context to fail if we can't parse it.
97
+
98
+ Returns:
99
+ int: Size in bytes
100
+
101
+ """
102
+ try:
103
+ return self.expand_value(value)
104
+ except ValueError as err:
105
+ self.fail(str(err), param, ctx)
106
+ return 0
107
+
108
+
109
+ defaults = {
110
+ "max_total_size_bytes": FileSizeParamType.expand_value("100G"),
111
+ "max_archive_size_bytes": FileSizeParamType.expand_value("10G"),
112
+ "min_archive_ratio": 0.005,
113
+ }
114
+
115
+ config = copy.deepcopy(defaults)
116
+
117
+
118
+ @click.command(
119
+ context_settings={"help_option_names": ["-h", "--help"], "show_default": True}
120
+ )
121
+ @click.rich_config(
122
+ help_config=click.RichHelpConfiguration(
123
+ width=88,
124
+ show_arguments=True,
125
+ text_markup=True,
126
+ ),
127
+ )
128
+ @click.argument(
129
+ "archive_path",
130
+ type=click.Path(exists=True, dir_okay=False),
131
+ help="Archive to examine",
132
+ )
133
+ @click.option(
134
+ "--max_total_size_bytes",
135
+ type=FileSizeParamType(),
136
+ default=defaults["max_total_size_bytes"],
137
+ help=f"Maximum total extraction size before failing, default {FileSizeParamType.compact_value(defaults['max_total_size_bytes'])}",
138
+ )
139
+ @click.option(
140
+ "--max_archive_size_bytes",
141
+ type=FileSizeParamType(),
142
+ default=defaults["max_archive_size_bytes"],
143
+ help=f"Maximum individual archive extraction size before failing, default {FileSizeParamType.compact_value(defaults['max_archive_size_bytes'])}",
144
+ )
145
+ @click.option(
146
+ "--min_archive_ratio",
147
+ type=click.FloatRange(0, 1),
148
+ default=defaults["min_archive_ratio"],
149
+ help=f"Minimum allowed compression ratio for an archive. A floating-point value between 0.0 and 1.0, inclusive. Default is {defaults['min_archive_ratio']}",
150
+ )
151
+ @click.version_option(metadata.version("archae"), "-v", "--version")
152
+ def cli(
153
+ archive_path: str,
154
+ max_total_size_bytes: int,
155
+ max_archive_size_bytes: int,
156
+ min_archive_ratio: float,
157
+ ) -> None:
158
+ """Archae explodes archives."""
159
+ locate_tools()
160
+ config["max_total_size_bytes"] = max_total_size_bytes
161
+ config["max_archive_size_bytes"] = max_archive_size_bytes
162
+ config["min_archive_ratio"] = min_archive_ratio
163
+ handle_file(Path(archive_path))
164
+ debug_print_tracked_files()
165
+
166
+
167
+ tracked_files: dict[str, dict] = {}
168
+ base_dir = Path.cwd()
169
+ extract_dir = base_dir / "extracted"
170
+ if extract_dir.exists() and extract_dir.is_dir():
171
+ shutil.rmtree(extract_dir)
172
+ extract_dir.mkdir(exist_ok=True)
173
+
174
+
175
+ def locate_tools() -> None:
176
+ """Locate external tools."""
177
+ for cls in archae.util.archiver.BaseArchiver.__subclasses__():
178
+ tool_path = shutil.which(str(cls.executable_name))
179
+ if tool_path is not None:
180
+ tools[str(cls.archiver_name)] = cls(tool_path) # type: ignore[abstract]
181
+
182
+
183
+ def handle_file(file_path: Path) -> None:
184
+ """Handle a file given its path.
185
+
186
+ Args:
187
+ file_path (Path): The path to the file.
188
+ """
189
+ click.echo(f"Starting examination of file: {file_path!s}")
190
+
191
+ base_hash = sha256_hash_file(file_path)
192
+ file_size_bytes = file_path.stat().st_size
193
+ track_file(base_hash, file_size_bytes)
194
+ track_file_path(base_hash, file_path)
195
+ add_metadata_to_hash(base_hash, "type", magic.from_file(file_path))
196
+ add_metadata_to_hash(base_hash, "type_mime", magic.from_file(file_path, mime=True))
197
+ extension = file_path.suffix.lstrip(".").lower()
198
+ add_metadata_to_hash(base_hash, "extension", extension)
199
+ is_file_archive = is_archive(base_hash)
200
+ add_metadata_to_hash(base_hash, "is_archive", is_file_archive)
201
+ if is_file_archive:
202
+ archiver = get_archiver_for_file(base_hash)
203
+ if archiver:
204
+ extracted_size = archiver.get_archive_uncompressed_size(file_path)
205
+ add_metadata_to_hash(base_hash, "extracted_size", extracted_size)
206
+ compression_ratio = extracted_size / file_size_bytes
207
+ add_metadata_to_hash(
208
+ base_hash, "overall_compression_ratio", compression_ratio
209
+ )
210
+ if extracted_size > config["max_archive_size_bytes"]:
211
+ click.echo(
212
+ f"Skipped archive {file_path} because expected size {extracted_size} is greater than max_archive_size_bytes {config['max_archive_size_bytes']}"
213
+ )
214
+ elif compression_ratio < config["min_archive_ratio"]:
215
+ click.echo(
216
+ f"Skipped archive {file_path} because compression ratio {compression_ratio:.5f} is less than min_archive_ratio {config['min_archive_ratio']}"
217
+ )
218
+ else:
219
+ extraction_dir = extract_dir / base_hash
220
+ archiver.extract_archive(file_path, extraction_dir)
221
+ child_files = list_child_files(extraction_dir)
222
+ for child_file in child_files:
223
+ handle_file(child_file)
224
+ else:
225
+ click.echo(f"No suitable archiver found for file: {file_path!s}")
226
+
227
+
228
+ def is_archive(hash: str) -> bool:
229
+ """Determine the appropriate archiver for a file based on its metadata.
230
+
231
+ Args:
232
+ hash (str): The hash of the file.
233
+
234
+ Returns:
235
+ bool: True if the file is an archive, otherwise False.
236
+
237
+ """
238
+ metadata = get_tracked_file_metadata(hash)
239
+ mime_type = metadata.get("type_mime", "").lower()
240
+ extension = metadata.get("extension", "").lower()
241
+
242
+ for tool in tools.values():
243
+ if mime_type in tool.mime_types or extension in tool.file_extensions:
244
+ return True
245
+
246
+ return False
247
+
248
+
249
+ def get_archiver_for_file(hash: str) -> BaseArchiver | None:
250
+ """Determine the appropriate archiver for a file based on its metadata.
251
+
252
+ Args:
253
+ hash (str): The hash of the file.
254
+
255
+ Returns:
256
+ str | None: The name of the archiver tool if found, otherwise None.
257
+ """
258
+ metadata = get_tracked_file_metadata(hash)
259
+ mime_type = metadata.get("type_mime", "").lower()
260
+ extension = metadata.get("extension", "").lower()
261
+
262
+ for tool in tools.values():
263
+ if mime_type in tool.mime_types or extension in tool.file_extensions:
264
+ return tool
265
+ return None
266
+
267
+
268
+ def list_child_files(directory_path: Path, pattern: str = "*") -> list[Path]:
269
+ """Recursively get a list of files matching a pattern in a directory.
270
+
271
+ Args:
272
+ directory_path (Path): The starting directory path.
273
+ pattern (str): The file pattern to match (e.g., '*.txt', '*.py').
274
+
275
+ Returns:
276
+ list: A list of Path objects for the matching files.
277
+ """
278
+ # rglob performs a recursive search
279
+ files = list(directory_path.rglob(pattern))
280
+ # Optionally, filter out directories if pattern='*'
281
+ return [file for file in files if file.is_file()]
282
+
283
+
284
+ def sha256_hash_file(file_path: Path) -> str:
285
+ """Computes the SHA-256 hash of a file.
286
+
287
+ Args:
288
+ file_path (Path): The path to the file.
289
+
290
+ Returns:
291
+ str: The SHA-256 hash of the file in hexadecimal format.
292
+ """
293
+ try:
294
+ with file_path.open("rb") as f:
295
+ # Use hashlib.file_digest for simplicity and efficiency in Python 3.11+
296
+ digest = hashlib.file_digest(f, "sha256")
297
+ return digest.hexdigest()
298
+ except FileNotFoundError:
299
+ return "Error: File not found"
300
+
301
+
302
+ def debug_print_tracked_files() -> None:
303
+ """Print the tracked files for debugging purposes."""
304
+ click.echo("------------------------------------------------")
305
+ for hash, info in tracked_files.items():
306
+ click.echo(f"Hash: {hash}")
307
+ click.echo(f" Size: {info.get('size', 'Unknown')} bytes")
308
+ for path in info.get("paths", []):
309
+ click.echo(f" Path: {path}")
310
+ click.echo(" Metadata:")
311
+ for key, value in info.get("metadata", {}).items():
312
+ click.echo(f" {key}: {value}")
313
+
314
+
315
+ def track_file(hash: str, file_size_bytes: int) -> None:
316
+ """Track a file by its hash.
317
+
318
+ Args:
319
+ hash (str): The hash of the file to track.
320
+ file_size_bytes (int): The size of the file in bytes.
321
+ """
322
+ if hash not in tracked_files:
323
+ tracked_files[hash] = {}
324
+ tracked_files[hash]["size"] = file_size_bytes
325
+ tracked_files[hash]["metadata"] = {}
326
+ elif tracked_files[hash]["size"] != file_size_bytes:
327
+ msg = f"Hash collision detected for hash {hash} with differing sizes."
328
+ raise RuntimeError(msg)
329
+
330
+
331
+ def is_file_tracked(hash: str) -> bool:
332
+ """Check if a file is tracked by its hash.
333
+
334
+ Args:
335
+ hash (str): The hash of the file to check.
336
+ """
337
+ return hash in tracked_files
338
+
339
+
340
+ def get_tracked_file_metadata(hash: str) -> dict:
341
+ """Get metadata for a tracked file by its hash.
342
+
343
+ Args:
344
+ hash (str): The hash of the file.
345
+
346
+ Returns:
347
+ dict: The metadata of the tracked file.
348
+ """
349
+ return copy.deepcopy(tracked_files.get(hash, {}).get("metadata", {}))
350
+
351
+
352
+ def track_file_path(hash: str, file_path: Path) -> None:
353
+ """Track a file path by its hash.
354
+
355
+ Args:
356
+ hash (str): The hash of the file.
357
+ file_path (Path): The path to track.
358
+ """
359
+ if "paths" not in tracked_files[hash]:
360
+ tracked_files[hash]["paths"] = []
361
+
362
+ if file_path not in tracked_files[hash]["paths"]:
363
+ tracked_files[hash]["paths"].append(file_path)
364
+
365
+
366
+ def add_metadata_to_hash(hash: str, key: str, value: Any) -> None:
367
+ """Add metadata to a tracked file.
368
+
369
+ Args:
370
+ hash (str): The hash of the file.
371
+ key (str): The metadata key.
372
+ value (Any): The metadata value.
373
+ """
374
+ tracked_files[hash]["metadata"][key] = value
archae/py.typed ADDED
File without changes
@@ -0,0 +1 @@
1
+ """Utility modules for archae."""
@@ -0,0 +1,6 @@
1
+ """Archiver utilities for extracting archive files."""
2
+
3
+ from .base_archiver import BaseArchiver
4
+ from .peazip import PeazipArchiver
5
+ from .seven_zip import SevenZipArchiver
6
+ from .unar import UnarArchiver
@@ -0,0 +1,55 @@
1
+ """Base archiver class for extraction tools."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from pathlib import Path
5
+
6
+
7
+ class BaseArchiver(ABC):
8
+ """Base class for archiver/extractor tools."""
9
+
10
+ @abstractmethod
11
+ def __init__(self, executable_path: str | Path) -> None:
12
+ """Initialize the archiver.
13
+
14
+ Args:
15
+ executable_path: Path to the executable.
16
+ """
17
+
18
+ @property
19
+ def archiver_name(self) -> str:
20
+ """Get the archiver name."""
21
+ return self.archiver_name
22
+
23
+ @property
24
+ def executable_name(self) -> str:
25
+ """Get the executable name."""
26
+ return self.executable_name
27
+
28
+ @property
29
+ def file_extensions(self) -> list[str]:
30
+ """A non-abstract method that accesses the class impl for the file extensions."""
31
+ return self.file_extensions
32
+
33
+ @property
34
+ def mime_types(self) -> list[str]:
35
+ """A non-abstract method that accesses the class impl for the mime types."""
36
+ return self.mime_types
37
+
38
+ @abstractmethod
39
+ def extract_archive(self, archive_path: Path, extract_dir: Path) -> None:
40
+ """Extracts an archive to a specified directory.
41
+
42
+ Args:
43
+ archive_path (Path): The path to the archive file.
44
+ extract_dir (Path): The directory to extract the archive to.
45
+
46
+ """
47
+
48
+ @abstractmethod
49
+ def get_archive_uncompressed_size(self, archive_path: Path) -> int:
50
+ """Get the uncompressed size of the contents.
51
+
52
+ Args:
53
+ archive_path (Path): The path to the archive file.
54
+
55
+ """
@@ -0,0 +1,159 @@
1
+ """peazip archiver/extractor implementation."""
2
+
3
+ import subprocess
4
+ from pathlib import Path
5
+ from typing import ClassVar
6
+
7
+ from .base_archiver import BaseArchiver
8
+
9
+
10
+ class PeazipArchiver(BaseArchiver):
11
+ """Archiver implementation for peazip."""
12
+
13
+ file_extensions: ClassVar[list[str]] = [
14
+ "appinstaller",
15
+ "appx",
16
+ "appxbundle",
17
+ "gz",
18
+ "tgz",
19
+ "jar",
20
+ "ear",
21
+ "war",
22
+ "emsix",
23
+ "emsixbundle",
24
+ "msix",
25
+ "msixbundle",
26
+ "apk",
27
+ "deb",
28
+ "cab",
29
+ "chm",
30
+ "chw",
31
+ "chi",
32
+ "chq",
33
+ "pptx",
34
+ "pptm ",
35
+ "xlsx",
36
+ "xlsm",
37
+ "docx",
38
+ "docm",
39
+ "7z",
40
+ "s7z",
41
+ "ace",
42
+ "dmg",
43
+ "img",
44
+ "arc",
45
+ "pak",
46
+ "arj",
47
+ "br",
48
+ "bz2",
49
+ "tbz2",
50
+ "crx",
51
+ "z",
52
+ "taz",
53
+ "cpio",
54
+ "arc",
55
+ "pak",
56
+ "iso",
57
+ "img",
58
+ "lzma",
59
+ "wim",
60
+ "swm",
61
+ "esd",
62
+ "msi",
63
+ "msp",
64
+ "rar",
65
+ "r00",
66
+ "rpm",
67
+ "tar",
68
+ "vhd",
69
+ "vhdx",
70
+ "xar",
71
+ "pkg",
72
+ "xpi",
73
+ "xz",
74
+ "txz",
75
+ "ipa",
76
+ "zip",
77
+ "zipx",
78
+ "aar",
79
+ "zst",
80
+ ]
81
+ mime_types: ClassVar[list[str]] = [
82
+ "application/appinstaller",
83
+ "application/appx",
84
+ "application/appxbundle",
85
+ "application/gzip",
86
+ "application/java-archive",
87
+ "application/msix",
88
+ "application/msixbundle",
89
+ "application/vnd.android.package-archive",
90
+ "application/vnd.debian.binary-package",
91
+ "application/vnd.ms-cab-compressed",
92
+ "application/vnd.ms-htmlhelp",
93
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation",
94
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
95
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
96
+ "application/x-7z-compressed",
97
+ "application/x-ace-compressed",
98
+ "application/x-apple-diskimage",
99
+ "application/x-arc",
100
+ "application/x-arj",
101
+ "application/x-brotli",
102
+ "application/x-bzip2",
103
+ "application/x-chrome-extension",
104
+ "application/x-compress",
105
+ "application/x-cpio",
106
+ "application/x-freearc",
107
+ "application/x-iso9660-image",
108
+ "application/x-lzma",
109
+ "application/x-ms-wim",
110
+ "application/x-ole-storage",
111
+ "application/x-rar-compressed",
112
+ "application/x-rpm",
113
+ "application/x-tar",
114
+ "application/x-vhd",
115
+ "application/x-xar",
116
+ "application/x-xpinstall",
117
+ "application/x-xz",
118
+ "application/zip",
119
+ "application/zip",
120
+ "application/zip",
121
+ "application/zstd",
122
+ ]
123
+ archiver_name: str = "peazip"
124
+ executable_name: str = "pea"
125
+
126
+ def __init__(self, executable_path: str | Path) -> None:
127
+ """Initialize the peazip archiver.
128
+
129
+ Args:
130
+ executable_path: Path to the peazip executable.
131
+ """
132
+ self.executable_path = Path(executable_path)
133
+
134
+ def extract_archive(self, archive_path: Path, extract_dir: Path) -> None:
135
+ """Extracts an archive to a specified directory.
136
+
137
+ Args:
138
+ archive_path (Path): The path to the archive file.
139
+ extract_dir (Path): The directory to extract the archive to.
140
+
141
+ """
142
+ command: list[str] = [
143
+ str(self.executable_path),
144
+ "-ext2simple",
145
+ str(archive_path),
146
+ str(extract_dir),
147
+ ]
148
+ subprocess.run(command, check=True) # noqa: S603
149
+
150
+ def get_archive_uncompressed_size(self, archive_path: Path) -> int: # noqa: ARG002
151
+ """Get the uncompressed size of the contents.
152
+
153
+ Args:
154
+ archive_path (Path): The path to the archive file.
155
+
156
+ Returns:
157
+ int: The size of the contents
158
+ """
159
+ return -1
@@ -0,0 +1,199 @@
1
+ """7zip archiver/extractor implementation."""
2
+
3
+ import subprocess
4
+ from pathlib import Path
5
+ from typing import ClassVar
6
+
7
+ from .base_archiver import BaseArchiver
8
+
9
+
10
+ class SevenZipArchiver(BaseArchiver):
11
+ """Archiver implementation for 7zip."""
12
+
13
+ file_extensions: ClassVar[list[str]] = [
14
+ "7z",
15
+ "s7z",
16
+ "apk",
17
+ "bz2",
18
+ "tbz2",
19
+ "crx",
20
+ "xpi",
21
+ "deb",
22
+ "gz",
23
+ "tgz",
24
+ "ipa",
25
+ "jar",
26
+ "ear",
27
+ "war",
28
+ "lzma",
29
+ "cab",
30
+ "docx",
31
+ "docm",
32
+ "pptx",
33
+ "pptm",
34
+ "xlsx",
35
+ "xlsm",
36
+ "emsix",
37
+ "emsixbundle",
38
+ "msix",
39
+ "appinstaller",
40
+ "appx",
41
+ "appxbundle",
42
+ "msixbundle",
43
+ "z",
44
+ "taz",
45
+ "tar",
46
+ "zip",
47
+ "zipx",
48
+ "appimage",
49
+ "dmg ",
50
+ "img",
51
+ "arj",
52
+ "cpio",
53
+ "cramfs",
54
+ "raw",
55
+ "alz",
56
+ "ext",
57
+ "ext2",
58
+ "ext3",
59
+ "ext4",
60
+ "xar",
61
+ "pkg",
62
+ "fat",
63
+ "gpt",
64
+ "hfs",
65
+ "hfsx",
66
+ "iso",
67
+ "lha",
68
+ "lhz",
69
+ "mbr",
70
+ "chm",
71
+ "chw",
72
+ "chi",
73
+ "chq",
74
+ "msi",
75
+ "msp",
76
+ "vhd",
77
+ "vhdx",
78
+ "ntfs",
79
+ "nsi",
80
+ "exe",
81
+ "nsis",
82
+ "qcow2",
83
+ "qcow",
84
+ "qcow2c",
85
+ "rpm",
86
+ "rar",
87
+ "r00",
88
+ "sqfs",
89
+ "sfs",
90
+ "sqsh",
91
+ "squashfs",
92
+ "scap",
93
+ "uefif",
94
+ "udf",
95
+ "edb",
96
+ "edp",
97
+ "edr",
98
+ "a",
99
+ "ar",
100
+ "deb",
101
+ "lib",
102
+ "vdi",
103
+ "vmdk",
104
+ "wim",
105
+ "swm",
106
+ "esd",
107
+ "xz",
108
+ "txz",
109
+ ]
110
+ mime_types: ClassVar[list[str]] = [
111
+ "application/x-7z-compressed",
112
+ "application/vnd.android.package-archive",
113
+ "application/x-bzip2",
114
+ "application/x-chrome-extension",
115
+ "application/x-xpinstall",
116
+ "application/vnd.debian.binary-package",
117
+ "application/gzip",
118
+ "application/java-archive",
119
+ "application/x-lzma",
120
+ "application/vnd.ms-cab-compressed",
121
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
122
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation",
123
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
124
+ "application/msix",
125
+ "application/appinstaller",
126
+ "application/appx",
127
+ "application/appxbundle",
128
+ "application/msixbundle",
129
+ "application/x-compress",
130
+ "application/x-tar",
131
+ "application/zip",
132
+ "application/x-apple-diskimage",
133
+ "application/x-arj",
134
+ "application/x-cpio",
135
+ "application/vnd.efi.img",
136
+ "application/x-alz-compressed",
137
+ "application/x-xar",
138
+ "application/x-iso9660-image",
139
+ "application/x-lzh",
140
+ "application/vnd.ms-htmlhelp",
141
+ "application/x-ole-storage",
142
+ "application/x-vhd",
143
+ "text/x-nsis",
144
+ "application/x-qemu-disk",
145
+ "application/x-rpm",
146
+ "application/x-rar-compressed",
147
+ "application/vnd.squashfs",
148
+ "application/x-archive",
149
+ "application/x-virtualbox-vdi",
150
+ "application/x-vmdk-disk",
151
+ "application/x-ms-wim",
152
+ "application/x-xz",
153
+ ]
154
+ archiver_name: str = "7zip"
155
+ executable_name: str = "7z"
156
+
157
+ def __init__(self, executable_path: str | Path) -> None:
158
+ """Initialize the 7zip archiver.
159
+
160
+ Args:
161
+ executable_path: Path to the 7zip executable.
162
+ """
163
+ self.executable_path = Path(executable_path)
164
+
165
+ def extract_archive(self, archive_path: Path, extract_dir: Path) -> None:
166
+ """Extracts an archive to a specified directory.
167
+
168
+ Args:
169
+ archive_path (Path): The path to the archive file.
170
+ extract_dir (Path): The directory to extract the archive to.
171
+
172
+ """
173
+ command: list[str] = [
174
+ str(self.executable_path),
175
+ "x",
176
+ str(archive_path),
177
+ f"-o{extract_dir!s}",
178
+ ]
179
+ subprocess.run(command, check=True) # noqa: S603
180
+
181
+ def get_archive_uncompressed_size(self, archive_path: Path) -> int:
182
+ """Get the uncompressed size of the contents.
183
+
184
+ Args:
185
+ archive_path (Path): The path to the archive file.
186
+
187
+ Returns:
188
+ int: The size of the contents
189
+ """
190
+ command: list[str] = [str(self.executable_path), "l", "-slt", str(archive_path)]
191
+ result = subprocess.run(command, check=True, capture_output=True, text=True) # noqa: S603
192
+
193
+ result_lines = str(result.stdout).splitlines()
194
+ exploded_size = 0
195
+ for line in result_lines:
196
+ if line.startswith("Size = "):
197
+ exploded_size += int(line[7:])
198
+
199
+ return exploded_size
@@ -0,0 +1,158 @@
1
+ """unar archiver/extractor implementation."""
2
+
3
+ import subprocess
4
+ from pathlib import Path
5
+ from typing import ClassVar
6
+
7
+ from .base_archiver import BaseArchiver
8
+
9
+
10
+ class UnarArchiver(BaseArchiver):
11
+ """Archiver implementation for unar."""
12
+
13
+ file_extensions: ClassVar[list[str]] = [
14
+ "appinstaller",
15
+ "appx",
16
+ "appxbundle",
17
+ "gz",
18
+ "tgz",
19
+ "emsix",
20
+ "emsixbundle",
21
+ "msix",
22
+ "msixbundle",
23
+ "apk",
24
+ "deb",
25
+ "cab",
26
+ "pptx",
27
+ "pptm ",
28
+ "xlsx ",
29
+ "xlsm",
30
+ "docx",
31
+ "docm",
32
+ "7z",
33
+ "s7z",
34
+ "ace",
35
+ "alz",
36
+ "arc",
37
+ "pak",
38
+ "a",
39
+ "ar",
40
+ "deb",
41
+ "lib",
42
+ "arj",
43
+ "bz2",
44
+ "tbz2",
45
+ "crx",
46
+ "z",
47
+ "taz",
48
+ "cpio",
49
+ "arc",
50
+ "pak",
51
+ "iso",
52
+ "img",
53
+ "lha",
54
+ "lhz",
55
+ "lzma",
56
+ "msi",
57
+ "msp",
58
+ "rar",
59
+ "r00",
60
+ "sit",
61
+ "sitx",
62
+ "tar",
63
+ "xar",
64
+ "pkg",
65
+ "xpi",
66
+ "xz",
67
+ "txz",
68
+ "zoo",
69
+ "zip",
70
+ "zipx",
71
+ "aar",
72
+ "nsi",
73
+ "exe",
74
+ "nsis",
75
+ "udf",
76
+ "edb",
77
+ "edp",
78
+ "edr",
79
+ ]
80
+ mime_types: ClassVar[list[str]] = [
81
+ "application/appinstaller",
82
+ "application/appx",
83
+ "application/appxbundle",
84
+ "application/gzip",
85
+ "application/msix",
86
+ "application/msixbundle",
87
+ "application/vnd.android.package-archive",
88
+ "application/vnd.debian.binary-package",
89
+ "application/vnd.ms-cab-compressed",
90
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation",
91
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
92
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
93
+ "application/x-7z-compressed",
94
+ "application/x-ace-compressed",
95
+ "application/x-alz-compressed",
96
+ "application/x-arc",
97
+ "application/x-archive",
98
+ "application/x-arj",
99
+ "application/x-bzip2",
100
+ "application/x-chrome-extension",
101
+ "application/x-compress",
102
+ "application/x-cpio",
103
+ "application/x-freearc",
104
+ "application/x-iso9660-image",
105
+ "application/x-lzh",
106
+ "application/x-lzma",
107
+ "application/x-ole-storage",
108
+ "application/x-rar-compressed",
109
+ "application/x-stuffit",
110
+ "application/x-sit",
111
+ "application/x-stuffitx",
112
+ "application/x-sitx",
113
+ "application/x-tar",
114
+ "application/x-xar",
115
+ "application/x-xpinstall",
116
+ "application/x-xz",
117
+ "application/x-zoo",
118
+ "application/zip",
119
+ "application/zip",
120
+ "text/x-nsis",
121
+ ]
122
+ archiver_name: str = "unar"
123
+ executable_name: str = "unar"
124
+
125
+ def __init__(self, executable_path: str | Path) -> None:
126
+ """Initialize the unar archiver.
127
+
128
+ Args:
129
+ executable_path: Path to the unar executable.
130
+ """
131
+ self.executable_path = Path(executable_path)
132
+
133
+ def extract_archive(self, archive_path: Path, extract_dir: Path) -> None:
134
+ """Extracts an archive to a specified directory.
135
+
136
+ Args:
137
+ archive_path (Path): The path to the archive file.
138
+ extract_dir (Path): The directory to extract the archive to.
139
+
140
+ """
141
+ command: list[str] = [
142
+ str(self.executable_path),
143
+ "-o",
144
+ str(extract_dir),
145
+ str(archive_path),
146
+ ]
147
+ subprocess.run(command, check=True) # noqa: S603
148
+
149
+ def get_archive_uncompressed_size(self, archive_path: Path) -> int: # noqa: ARG002
150
+ """Get the uncompressed size of the contents.
151
+
152
+ Args:
153
+ archive_path (Path): The path to the archive file.
154
+
155
+ Returns:
156
+ int: The size of the contents
157
+ """
158
+ return -1
@@ -0,0 +1,5 @@
1
+ """Enum utilities for archae."""
2
+
3
+ from archae.util.enum.byte_scale import ByteScale
4
+
5
+ __all__ = ["ByteScale"]
@@ -0,0 +1,46 @@
1
+ """Byte scale enum for file size operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from enum import Enum
6
+ from typing import Self
7
+
8
+
9
+ class ByteScale(Enum):
10
+ """Byte scale prefix converter."""
11
+
12
+ NONE = (0, "")
13
+ KILO = (1, "K")
14
+ MEGA = (2, "M")
15
+ GIGA = (3, "G")
16
+ TERA = (4, "T")
17
+ PETA = (5, "P")
18
+
19
+ def __new__(cls, exponent: int, prefix_letter: str) -> Self:
20
+ """Apply values to the new Enum.
21
+
22
+ __new__ is used to control how new enum members are instantiated.
23
+ It must set the `_value_` attribute and any custom attributes.
24
+
25
+ Args:
26
+ exponent (int): the exponent value for the scale
27
+ prefix_letter (str): the prefix letter for the scale
28
+
29
+ Returns:
30
+ ByteScale: A new ByteScale enum.
31
+
32
+ """
33
+ obj = object.__new__(cls)
34
+ obj._value_ = exponent
35
+ obj.prefix_letter = prefix_letter
36
+ return obj
37
+
38
+ @property
39
+ def prefix_letter(self) -> str:
40
+ """Return the prefix letter for this scale."""
41
+ return self._prefix_letter
42
+
43
+ @prefix_letter.setter
44
+ def prefix_letter(self, value: str) -> None:
45
+ """Setter for prefix letter."""
46
+ self._prefix_letter = value
@@ -0,0 +1,147 @@
1
+ Metadata-Version: 2.3
2
+ Name: archae
3
+ Version: 2026.1.0
4
+ Summary: Archae explodes archives.
5
+ Keywords:
6
+ Author: Shawn McNaughton
7
+ Author-email: Shawn McNaughton <shawngmc@gmail.com>
8
+ License: MIT License
9
+
10
+ Copyright © 2026 Shawn McNaughton
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+ Classifier: Development Status :: 2 - Pre-Alpha
30
+ Classifier: Environment :: Console
31
+ Classifier: Intended Audience :: Developers
32
+ Classifier: License :: OSI Approved :: MIT License
33
+ Classifier: Programming Language :: Python
34
+ Classifier: Programming Language :: Python :: 3
35
+ Classifier: Programming Language :: Python :: 3.11
36
+ Classifier: Programming Language :: Python :: 3.12
37
+ Classifier: Programming Language :: Python :: 3.13
38
+ Classifier: Programming Language :: Python :: 3.14
39
+ Classifier: Programming Language :: Python :: 3 :: Only
40
+ Classifier: Typing :: Typed
41
+ Requires-Dist: click
42
+ Requires-Dist: python-magic
43
+ Requires-Dist: rich-click
44
+ Requires-Python: ~=3.11
45
+ Project-URL: Documentation, https://archae.readthedocs.io/en/stable/
46
+ Project-URL: Changelog, https://archae.readthedocs.io/en/stable/changelog.html
47
+ Project-URL: Source Code, https://github.com/shawngmc/archae
48
+ Project-URL: Issue Tracker, https://github.com/shawngmc/archae/issues
49
+ Description-Content-Type: text/markdown
50
+
51
+ <!-- start docs-include-index -->
52
+
53
+ # Archae
54
+
55
+ ![Archae Logo of a spider exploring a sarcophagus](./_static/archae_logo.png)
56
+
57
+ [![PyPI](https://img.shields.io/pypi/v/archae)](https://img.shields.io/pypi/v/archae)
58
+ [![Supported Python Versions](https://img.shields.io/pypi/pyversions/archae)](https://pypi.org/project/archae/)
59
+ [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/shawngmc/archae/main.svg)](https://results.pre-commit.ci/latest/github/shawngmc/archae/main)
60
+ [![Test](https://github.com/shawngmc/archae/actions/workflows/test.yml/badge.svg)](https://github.com/shawngmc/archae/actions/workflows/test.yml)
61
+ [![Documentation Status](https://readthedocs.org/projects/archae/badge/?version=latest)](https://archae.readthedocs.io/en/latest/?badge=latest)
62
+ [![PyPI - License](https://img.shields.io/pypi/l/archae)](https://img.shields.io/pypi/l/archae)
63
+
64
+ Archae explodes archives.
65
+
66
+ <!-- end docs-include-index -->
67
+
68
+ ## Installation
69
+
70
+ <!-- start docs-include-installation -->
71
+
72
+ Archae is available on [PyPI](https://pypi.org/project/archae/). Install with [uv](https://docs.astral.sh/uv/) or your package manager of choice:
73
+
74
+ ```sh
75
+ uv tool install archae
76
+ ```
77
+
78
+ <!-- end docs-include-installation -->
79
+
80
+ ## Documentation
81
+
82
+ Check out the [Archae documentation](https://archae.readthedocs.io/en/stable/) for the [User's Guide](https://archae.readthedocs.io/en/stable/usage.html) and [CLI Reference](https://archae.readthedocs.io/en/stable/cli.html).
83
+
84
+ ## Usage
85
+
86
+ <!-- start docs-include-usage -->
87
+
88
+ Running `archae --help` or `python -m archae --help` shows a list of all of the available options and arguments:
89
+
90
+ <!-- [[[cog
91
+ import cog
92
+ from archae import cli
93
+ from click.testing import CliRunner
94
+ runner = CliRunner()
95
+ result = runner.invoke(cli.cli, ["--help"], terminal_width=88)
96
+ help = result.output.replace("Usage: cli", "Usage: archae")
97
+ cog.outl(f"\n```sh\narchae --help\n{help.rstrip()}\n```\n")
98
+ ]]] -->
99
+
100
+ ```sh
101
+ archae --help
102
+
103
+ Usage: archae [OPTIONS] ARCHIVE_PATH
104
+
105
+ Archae explodes archives.
106
+
107
+ ╭─ Arguments ──────────────────────────────────────────────────────────────────────────╮
108
+ │ * ARCHIVE_PATH FILE Archive to examine [required] │
109
+ ╰──────────────────────────────────────────────────────────────────────────────────────╯
110
+ ╭─ Options ────────────────────────────────────────────────────────────────────────────╮
111
+ │ --max_total_size_bytes FILESIZE Maximum total extraction size │
112
+ │ before failing, default 100G │
113
+ │ [default: 107374182400] │
114
+ │ --max_archive_size_bytes FILESIZE Maximum individual archive │
115
+ │ extraction size before failing, │
116
+ │ default 10G [default: │
117
+ │ 10737418240] │
118
+ │ --min_archive_ratio FLOAT RANGE [0<=x<=1] Minimum allowed compression │
119
+ │ ratio for an archive. A │
120
+ │ floating-point value between │
121
+ │ 0.0 and 1.0, inclusive. Default │
122
+ │ is 0.005 [default: 0.005] │
123
+ │ --version -v Show the version and exit. │
124
+ │ --help -h Show this message and exit. │
125
+ ╰──────────────────────────────────────────────────────────────────────────────────────╯
126
+ ```
127
+
128
+ <!-- [[[end]]] -->
129
+
130
+ <!-- end docs-include-usage -->
131
+
132
+ ## Features
133
+
134
+ - Uses 7z/peazip/unar (not 7za/7zr) to try to extract archives
135
+ - No substantial limit on the number of archive layers
136
+ - Identifies file types via libmagic
137
+ - Basic archive bomb protections
138
+
139
+ ## TODOs
140
+
141
+ - Protect against/detect archive bombs (malicious disk usage)
142
+ - Improve archive type detection
143
+ - Separate between extractable and non-extractable archive types
144
+ - Protect against/detect depth attacks (excessively nested archives)
145
+ - Detect password-protected archives
146
+ - Allow supplying archive passwords by hash
147
+ - Add custom magic to detect obscure archive formats
@@ -0,0 +1,16 @@
1
+ archae/__init__.py,sha256=tqGdU1tlaD9BIf9_PfhiMRLVsoqTTHr9JIJq7jht1ys,32
2
+ archae/__main__.py,sha256=l3eO5dEs1cR_hDziOpnW1PYzoRL2siYm81wgvftyigg,172
3
+ archae/cli.py,sha256=lmkoCa_G-OV1UvdvvpNwbwgr9c1LdZNfEsZjrGXvAOY,11949
4
+ archae/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ archae/util/__init__.py,sha256=HlENK1XfnwnYMrlkvVJTjs-6W7RLLUJOoyFcOb7njUE,34
6
+ archae/util/archiver/__init__.py,sha256=baayr_wh2MfRaKKiVuaUxtYJcMzKR82Uf1amcU7QtzM,202
7
+ archae/util/archiver/base_archiver.py,sha256=_2-ARZLxPqSeE_0TF7NTVK8TwWhLyP9fA2G0fnMb0zc,1549
8
+ archae/util/archiver/peazip.py,sha256=XdhI2EQ3A9EH89KxPv-FcmjukwWcS96WtG-gwkha6hs,4022
9
+ archae/util/archiver/seven_zip.py,sha256=eck_cXMGFqwJ05ZKge3N0Ozy1XkKuGyE09s3jFcsMrQ,4951
10
+ archae/util/archiver/unar.py,sha256=xWTR7cpFXyofvtPI-7Z5Ski605WmJdLoAm6a81IriAc,3974
11
+ archae/util/enum/__init__.py,sha256=IvjtVopATKLAHDjOpblaExy2yXwIzweX0HoUrQWcpkM,109
12
+ archae/util/enum/byte_scale.py,sha256=uCTTaT0hiDKdVwJ1l6i35_Iv2VVpHmnW9i0gfZ5FB1I,1204
13
+ archae-2026.1.0.dist-info/WHEEL,sha256=fAguSjoiATBe7TNBkJwOjyL1Tt4wwiaQGtNtjRPNMQA,80
14
+ archae-2026.1.0.dist-info/entry_points.txt,sha256=gGL_R78QELaTeyFGb-OuSnRuu4EUdT68EKmyrFno59o,48
15
+ archae-2026.1.0.dist-info/METADATA,sha256=UJAYwbNwo-bwsddem7zknCuJ4Q0xbFinjfgpqA2CEt0,7457
16
+ archae-2026.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.9.28
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ archae = archae.__main__:cli
3
+