archae 2026.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,93 @@
1
+ """File tracking utilities for archae."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import copy
6
+ from typing import Any
7
+
8
+
9
+ class FileTracker:
10
+ """Manages file tracking by hash with metadata and paths."""
11
+
12
+ def __init__(self) -> None:
13
+ """Initialize the FileTracker."""
14
+ self.tracked_files: dict[str, dict] = {}
15
+
16
+ def track_file(self, file_hash: str, file_size_bytes: int) -> None:
17
+ """Track a file by its hash.
18
+
19
+ Args:
20
+ file_hash (str): The hash of the file to track.
21
+ file_size_bytes (int): The size of the file in bytes.
22
+ """
23
+ if file_hash not in self.tracked_files:
24
+ self.tracked_files[file_hash] = {}
25
+ self.tracked_files[file_hash]["size"] = file_size_bytes
26
+ self.tracked_files[file_hash]["metadata"] = {}
27
+ elif self.tracked_files[file_hash]["size"] != file_size_bytes:
28
+ msg = f"Hash collision detected for hash {file_hash} with differing sizes."
29
+ raise RuntimeError(msg)
30
+
31
+ def is_file_tracked(self, file_hash: str) -> bool:
32
+ """Check if a file is tracked by its hash.
33
+
34
+ Args:
35
+ file_hash (str): The hash of the file to check.
36
+
37
+ Returns:
38
+ bool: True if the file is tracked, False otherwise.
39
+ """
40
+ return file_hash in self.tracked_files
41
+
42
+ def get_tracked_file_metadata(self, file_hash: str) -> dict:
43
+ """Get metadata for a tracked file by its hash.
44
+
45
+ Args:
46
+ file_hash (str): The hash of the file.
47
+
48
+ Returns:
49
+ dict: The metadata of the tracked file.
50
+ """
51
+ return copy.deepcopy(self.tracked_files.get(file_hash, {}).get("metadata", {}))
52
+
53
+ def track_file_path(self, file_hash: str, file_path: Any) -> None:
54
+ """Track a file path by its hash.
55
+
56
+ Args:
57
+ file_hash (str): The hash of the file.
58
+ file_path: The path to track.
59
+ """
60
+ if "paths" not in self.tracked_files[file_hash]:
61
+ self.tracked_files[file_hash]["paths"] = []
62
+
63
+ if file_path not in self.tracked_files[file_hash]["paths"]:
64
+ self.tracked_files[file_hash]["paths"].append(file_path)
65
+
66
+ def add_metadata_to_hash(self, file_hash: str, key: str, value: Any) -> None:
67
+ """Add metadata to a tracked file.
68
+
69
+ Args:
70
+ file_hash (str): The hash of the file.
71
+ key (str): The metadata key.
72
+ value (Any): The metadata value.
73
+ """
74
+ self.tracked_files[file_hash]["metadata"][key] = value
75
+
76
+ def get_tracked_file_size(self) -> int:
77
+ """Get the total size of all tracked files.
78
+
79
+ Returns:
80
+ int: The total size in bytes.
81
+ """
82
+ return sum(
83
+ self.tracked_files[file_hash].get("size", 0)
84
+ for file_hash in self.tracked_files
85
+ )
86
+
87
+ def get_tracked_files(self) -> dict[str, dict]:
88
+ """Get all tracked files. This is a deep copy to prevent external modification.
89
+
90
+ Returns:
91
+ dict[str, dict]: The tracked files dictionary.
92
+ """
93
+ return copy.deepcopy(self.tracked_files)
@@ -0,0 +1,112 @@
1
+ """Tool manager for locating and managing external archiving tools."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import shutil
7
+ from typing import TYPE_CHECKING, ClassVar, cast
8
+
9
+ import archae.util.archiver
10
+
11
+ if TYPE_CHECKING:
12
+ from archae.util.archiver.base_archiver import BaseArchiver
13
+
14
+ logger = logging.getLogger("archae")
15
+
16
+
17
+ class ToolManager:
18
+ """Manager for locating and managing external archiving tools."""
19
+
20
+ __tools: ClassVar[dict[str, BaseArchiver]] = {}
21
+
22
+ @classmethod
23
+ def locate_tools(cls) -> None:
24
+ """Locate external tools."""
25
+ for archiver_cls in archae.util.archiver.BaseArchiver.__subclasses__():
26
+ logger.debug("Locating tool for %s", archiver_cls.archiver_name)
27
+ tool_path = shutil.which(str(archiver_cls.executable_name))
28
+ if tool_path:
29
+ logger.debug("Found %s at %s", archiver_cls.archiver_name, tool_path)
30
+ cls.__tools[str(archiver_cls.archiver_name)] = archiver_cls(tool_path) # type: ignore[abstract]
31
+ else:
32
+ logger.warning(
33
+ "MISSING_ARCHIVER: Could not find %s; some archive types may not be supported",
34
+ archiver_cls.archiver_name,
35
+ )
36
+
37
+ @classmethod
38
+ def get_supported_extensions(cls) -> list[str]:
39
+ """Get a sorted list of all file extensions supported by located tools.
40
+
41
+ Returns:
42
+ list[str]: Sorted list of supported file extensions.
43
+ """
44
+ supported: set[str] = set()
45
+ for tool in cls.__tools.values():
46
+ supported.update(tool.file_extensions)
47
+ return sorted(supported)
48
+
49
+ @classmethod
50
+ def get_unsupported_extensions(cls) -> list[str]:
51
+ """Get a sorted list of all file extensions from all archiver subclasses that are not currently supported.
52
+
53
+ Returns:
54
+ list[str]: Sorted list of unsupported file extensions.
55
+ """
56
+ all_extensions: set[str] = set()
57
+ supported: set[str] = set()
58
+
59
+ # Get all extensions from all archiver classes
60
+ for archiver_cls in archae.util.archiver.BaseArchiver.__subclasses__():
61
+ all_extensions.update(cast("list[str]", archiver_cls.file_extensions))
62
+
63
+ # Get supported extensions from located tools
64
+ for tool in cls.__tools.values():
65
+ supported.update(tool.file_extensions)
66
+
67
+ # Return the difference
68
+ unsupported = all_extensions - supported
69
+ return sorted(unsupported)
70
+
71
+ @classmethod
72
+ def get_supported_mime_types(cls) -> list[str]:
73
+ """Get a sorted list of all MIME types supported by located tools.
74
+
75
+ Returns:
76
+ list[str]: Sorted list of supported MIME types.
77
+ """
78
+ supported: set[str] = set()
79
+ for tool in cls.__tools.values():
80
+ supported.update(tool.mime_types)
81
+ return sorted(supported)
82
+
83
+ @classmethod
84
+ def get_unsupported_mime_types(cls) -> list[str]:
85
+ """Get a sorted list of all MIME types from all archiver subclasses that are not currently supported.
86
+
87
+ Returns:
88
+ list[str]: Sorted list of unsupported MIME types.
89
+ """
90
+ all_mime_types: set[str] = set()
91
+ supported: set[str] = set()
92
+
93
+ # Get all MIME types from all archiver classes
94
+ for archiver_cls in archae.util.archiver.BaseArchiver.__subclasses__():
95
+ all_mime_types.update(cast("list[str]", archiver_cls.mime_types))
96
+
97
+ # Get supported MIME types from located tools
98
+ for tool in cls.__tools.values():
99
+ supported.update(tool.mime_types)
100
+
101
+ # Return the difference
102
+ unsupported = all_mime_types - supported
103
+ return sorted(unsupported)
104
+
105
+ @classmethod
106
+ def get_tools(cls) -> dict[str, BaseArchiver]:
107
+ """Get a shallow copy of the tools dictionary.
108
+
109
+ Returns:
110
+ dict[str, BaseArchiver]: A shallow copy of the tools dictionary.
111
+ """
112
+ return cls.__tools.copy()
@@ -0,0 +1,161 @@
1
+ Metadata-Version: 2.3
2
+ Name: archae
3
+ Version: 2026.2.0
4
+ Summary: Archae explodes archives.
5
+ Keywords:
6
+ Author: Shawn McNaughton
7
+ Author-email: Shawn McNaughton <shawngmc@gmail.com>
8
+ License: MIT License
9
+
10
+ Copyright © 2026 Shawn McNaughton
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+ Classifier: Development Status :: 2 - Pre-Alpha
30
+ Classifier: Environment :: Console
31
+ Classifier: Intended Audience :: Developers
32
+ Classifier: License :: OSI Approved :: MIT License
33
+ Classifier: Programming Language :: Python
34
+ Classifier: Programming Language :: Python :: 3
35
+ Classifier: Programming Language :: Python :: 3.12
36
+ Classifier: Programming Language :: Python :: 3.13
37
+ Classifier: Programming Language :: Python :: 3.14
38
+ Classifier: Programming Language :: Python :: 3 :: Only
39
+ Classifier: Typing :: Typed
40
+ Requires-Dist: click
41
+ Requires-Dist: dynaconf>=3.2.12
42
+ Requires-Dist: platformdirs>=4.5.1
43
+ Requires-Dist: python-magic
44
+ Requires-Dist: pyyaml>=6.0.3
45
+ Requires-Dist: rich-click
46
+ Requires-Dist: types-pyyaml>=6
47
+ Requires-Python: >=3.12
48
+ Project-URL: Documentation, https://archae.readthedocs.io/en/stable/
49
+ Project-URL: Changelog, https://archae.readthedocs.io/en/stable/changelog.html
50
+ Project-URL: Source Code, https://github.com/shawngmc/archae
51
+ Project-URL: Issue Tracker, https://github.com/shawngmc/archae/issues
52
+ Description-Content-Type: text/markdown
53
+
54
+ <!-- start docs-include-index -->
55
+
56
+ # Archae
57
+
58
+ ![Archae Logo of a spider exploring a sarcophagus](./_static/archae_logo.png)
59
+
60
+ [![PyPI](https://img.shields.io/pypi/v/archae)](https://img.shields.io/pypi/v/archae)
61
+ [![Supported Python Versions](https://img.shields.io/pypi/pyversions/archae)](https://pypi.org/project/archae/)
62
+ [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/shawngmc/archae/main.svg)](https://results.pre-commit.ci/latest/github/shawngmc/archae/main)
63
+ [![Test](https://github.com/shawngmc/archae/actions/workflows/test.yml/badge.svg)](https://github.com/shawngmc/archae/actions/workflows/test.yml)
64
+ [![Documentation Status](https://readthedocs.org/projects/archae/badge/?version=latest)](https://archae.readthedocs.io/en/latest/?badge=latest)
65
+ [![PyPI - License](https://img.shields.io/pypi/l/archae)](https://img.shields.io/pypi/l/archae)
66
+
67
+ Archae explodes archives.
68
+
69
+ <!-- end docs-include-index -->
70
+
71
+ ## Why
72
+
73
+ Every once and a while, I run into an issue: multiple layers of archives. The reasons vary, but examples would include:
74
+
75
+ - Searching for something in a ZIP of folders that contained a ZIP with a CD image in it
76
+ - Running a malware scan and finding an obscure archive format was missed and not even flagged
77
+ Meanwhile, I want to make sure I don't fill my disk, especially if an archive bomb (more commonly known as a ZIP bomb) has been jammed in somewhere. They're only funny the first time. :D
78
+
79
+ ## Features
80
+
81
+ - Uses 7z/peazip/unar (not 7za/7zr) to try to extract archives
82
+ - No substantial limit on the number of archive layers
83
+ - Identifies file types via libmagic
84
+ - Detects duplicate archives
85
+ - Basic archive bomb protections
86
+ - MAX_ARCHIVE_SIZE_BYTES - ensures the uncompressed size of an archive is limited
87
+ - MAX_TOTAL_SIZE_BYTES - ensures the total extracted footprint isn't above a certain size
88
+ - MIN_ARCHIVE_RATIO - ensures very-high-compression-ratio archives are stopped
89
+ - MIN_DISK_FREE_SPACE - minimum free space at the extraction location
90
+ - MAX_DEPTH - allow setting a maximum archive depth to traverse
91
+
92
+ ## Installation
93
+
94
+ <!-- start docs-include-installation -->
95
+
96
+ Archae is available on [PyPI](https://pypi.org/project/archae/). Install with [uv](https://docs.astral.sh/uv/) or your package manager of choice:
97
+
98
+ ```sh
99
+ uv tool install archae
100
+ ```
101
+
102
+ <!-- end docs-include-installation -->
103
+
104
+ ## Documentation
105
+
106
+ Check out the [Archae documentation](https://archae.readthedocs.io/en/stable/) for the [User's Guide](https://archae.readthedocs.io/en/stable/usage.html) and [CLI Reference](https://archae.readthedocs.io/en/stable/cli.html).
107
+
108
+ ## Usage
109
+
110
+ Configuration values are supplied one of four ways, and any item lower in this list will overwrite a prior one:
111
+
112
+ - Default values are stored in the app
113
+ - A TOML file at ~/.config/archae/ will be created on first run and can override those values (ex. MIN_ARCHIVE_RATIO = 0.005)
114
+ - Env vars starts starting with "ARCHAE\_" are parsed (ex. ARCHAE_MIN_ARCHIVE_RATIO=0.005)
115
+ - Values can be passed in as flags (ex. --min_archive_ratio=0.005)
116
+
117
+ <!-- start docs-include-usage -->
118
+
119
+ Running `archae --help` or `python -m archae --help` shows a list of all of the available options and arguments:
120
+
121
+ <!-- [[[cog
122
+ import cog
123
+ from archae import cli
124
+ from click.testing import CliRunner
125
+ runner = CliRunner()
126
+ result = runner.invoke(cli.cli, ["--help"], terminal_width=88)
127
+ help = result.output.replace("Usage: cli", "Usage: archae")
128
+ cog.outl(f"\n```sh\narchae --help\n{help.rstrip()}\n```\n")
129
+ ]]] -->
130
+
131
+ ```sh
132
+ archae --help
133
+
134
+ Usage: archae [OPTIONS] COMMAND [ARGS]...
135
+
136
+ Archae explodes archives.
137
+
138
+ ╭─ Options ────────────────────────────────────────────────────────────────────────────╮
139
+ │ --version -v Show the version and exit. │
140
+ │ --help -h Show this message and exit. │
141
+ ╰──────────────────────────────────────────────────────────────────────────────────────╯
142
+ ╭─ Commands ───────────────────────────────────────────────────────────────────────────╮
143
+ │ extract Extract and analyze an archive. │
144
+ │ listopts List all available configuration options. │
145
+ │ status Show archae status and available tools. │
146
+ ╰──────────────────────────────────────────────────────────────────────────────────────╯
147
+ ```
148
+
149
+ <!-- [[[end]]] -->
150
+
151
+ <!-- end docs-include-usage -->
152
+
153
+ ## TODOs
154
+
155
+ - More archive bomb protections
156
+ - delete_archives_as_exploded - remove archive files to reduce duplication (boolean)
157
+ - Improve archive type detection
158
+ - Separate between extractable and non-extractable archive types
159
+ - Detect password-protected archives
160
+ - Allow supplying archive passwords by hash
161
+ - Add custom magic to detect obscure archive formats
@@ -0,0 +1,23 @@
1
+ archae/__init__.py,sha256=O_HNvpNVsJ5LZPNPLvuHhHdOq0b6M0LnWqycB505mkQ,111
2
+ archae/__main__.py,sha256=l3eO5dEs1cR_hDziOpnW1PYzoRL2siYm81wgvftyigg,172
3
+ archae/cli.py,sha256=ZYq7joe10y73dCwQY7666m-SY2dej_GVEmoYRyFpg7A,5739
4
+ archae/config.py,sha256=zGOwkPDfNLR-06ziZMXeLBamd-98AbNU0aTuoH7ibKQ,2952
5
+ archae/default_settings.toml,sha256=svBdN9QJm8UBBz6AEWpBp-gslSBcE5n00xZdl32KDEc,230
6
+ archae/extractor.py,sha256=CfYtsxuoXQ2rOLQIByJvLI9Y61U338dZoeb7ZBWAc_g,9948
7
+ archae/options.yaml,sha256=nJo7gOCuiS_fKd9C3iRwI3Eb_f4OT6mbdVkipqdISpk,937
8
+ archae/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ archae/util/__init__.py,sha256=HlENK1XfnwnYMrlkvVJTjs-6W7RLLUJOoyFcOb7njUE,34
10
+ archae/util/archiver/__init__.py,sha256=baayr_wh2MfRaKKiVuaUxtYJcMzKR82Uf1amcU7QtzM,202
11
+ archae/util/archiver/base_archiver.py,sha256=xEHynzmhwZdMl2fEDj9h-YaH9ZQEeaugF4H_ILoOxTY,1629
12
+ archae/util/archiver/peazip.py,sha256=XdhI2EQ3A9EH89KxPv-FcmjukwWcS96WtG-gwkha6hs,4022
13
+ archae/util/archiver/seven_zip.py,sha256=eck_cXMGFqwJ05ZKge3N0Ozy1XkKuGyE09s3jFcsMrQ,4951
14
+ archae/util/archiver/unar.py,sha256=xWTR7cpFXyofvtPI-7Z5Ski605WmJdLoAm6a81IriAc,3974
15
+ archae/util/converter/file_size.py,sha256=BdcYzpdvCDq3YH1eYW1-zwnQCxv9mBzG4DuqHY2dt6o,1984
16
+ archae/util/enum/__init__.py,sha256=IvjtVopATKLAHDjOpblaExy2yXwIzweX0HoUrQWcpkM,109
17
+ archae/util/enum/byte_scale.py,sha256=5TZG1msPmJU9whZtKAywZtKOre6p2xMJ2y0gE4TG3OE,1593
18
+ archae/util/file_tracker.py,sha256=HQb1l7j_Jy0qO6tqkB8jXM6P2o5fLB6Ih0J5U4JUyT8,3106
19
+ archae/util/tool_manager.py,sha256=HU2xkmb_18XF5SFwFV5gUUcPddZVBYRdGB2PSWgWaqA,3974
20
+ archae-2026.2.0.dist-info/WHEEL,sha256=fAguSjoiATBe7TNBkJwOjyL1Tt4wwiaQGtNtjRPNMQA,80
21
+ archae-2026.2.0.dist-info/entry_points.txt,sha256=gGL_R78QELaTeyFGb-OuSnRuu4EUdT68EKmyrFno59o,48
22
+ archae-2026.2.0.dist-info/METADATA,sha256=C-zA6mJjkd8Ii6vBsVAsEvc8VA20acPGqa3A38950As,7974
23
+ archae-2026.2.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.9.28
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ archae = archae.__main__:cli
3
+