archae 2026.1.0__tar.gz → 2026.1.0b2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {archae-2026.1.0 → archae-2026.1.0b2}/PKG-INFO +25 -10
- {archae-2026.1.0 → archae-2026.1.0b2}/README.md +24 -9
- {archae-2026.1.0 → archae-2026.1.0b2}/pyproject.toml +1 -1
- {archae-2026.1.0 → archae-2026.1.0b2}/src/archae/cli.py +21 -14
- {archae-2026.1.0 → archae-2026.1.0b2}/src/archae/util/enum/byte_scale.py +9 -0
- {archae-2026.1.0 → archae-2026.1.0b2}/LICENSE +0 -0
- {archae-2026.1.0 → archae-2026.1.0b2}/src/archae/__init__.py +0 -0
- {archae-2026.1.0 → archae-2026.1.0b2}/src/archae/__main__.py +0 -0
- {archae-2026.1.0 → archae-2026.1.0b2}/src/archae/py.typed +0 -0
- {archae-2026.1.0 → archae-2026.1.0b2}/src/archae/util/__init__.py +0 -0
- {archae-2026.1.0 → archae-2026.1.0b2}/src/archae/util/archiver/__init__.py +0 -0
- {archae-2026.1.0 → archae-2026.1.0b2}/src/archae/util/archiver/base_archiver.py +0 -0
- {archae-2026.1.0 → archae-2026.1.0b2}/src/archae/util/archiver/peazip.py +0 -0
- {archae-2026.1.0 → archae-2026.1.0b2}/src/archae/util/archiver/seven_zip.py +0 -0
- {archae-2026.1.0 → archae-2026.1.0b2}/src/archae/util/archiver/unar.py +0 -0
- {archae-2026.1.0 → archae-2026.1.0b2}/src/archae/util/enum/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: archae
|
|
3
|
-
Version: 2026.1.
|
|
3
|
+
Version: 2026.1.0b2
|
|
4
4
|
Summary: Archae explodes archives.
|
|
5
5
|
Keywords:
|
|
6
6
|
Author: Shawn McNaughton
|
|
@@ -65,6 +65,25 @@ Archae explodes archives.
|
|
|
65
65
|
|
|
66
66
|
<!-- end docs-include-index -->
|
|
67
67
|
|
|
68
|
+
## Why
|
|
69
|
+
|
|
70
|
+
Every once and a while, I run into an issue: multiple layers of archives. The reasons vary, but examples would include:
|
|
71
|
+
|
|
72
|
+
- Searching for something in a ZIP of folders that contained a ZIP with a CD image in it
|
|
73
|
+
- Running a malware scan and finding an obscure archive format was missed and not even flagged
|
|
74
|
+
Meanwhile, I want to make sure I don't fill my disk, especially if an archive bomb (more commonly known as a ZIP bomb) has been jammed in somewhere. They're only funny the first time. :D
|
|
75
|
+
|
|
76
|
+
## Features
|
|
77
|
+
|
|
78
|
+
- Uses 7z/peazip/unar (not 7za/7zr) to try to extract archives
|
|
79
|
+
- No substantial limit on the number of archive layers
|
|
80
|
+
- Identifies file types via libmagic
|
|
81
|
+
- Detects duplicate archives
|
|
82
|
+
- Basic archive bomb protections
|
|
83
|
+
- min_archive_size_bytes - ensures the uncompressed size of an archive is limited
|
|
84
|
+
- min_total_size_bytes - ensures the total extracted footprint isn't above a certain size
|
|
85
|
+
- min_archive_ratio - ensures very-high-compression-ratio archives are stopped
|
|
86
|
+
|
|
68
87
|
## Installation
|
|
69
88
|
|
|
70
89
|
<!-- start docs-include-installation -->
|
|
@@ -129,19 +148,15 @@ archae --help
|
|
|
129
148
|
|
|
130
149
|
<!-- end docs-include-usage -->
|
|
131
150
|
|
|
132
|
-
## Features
|
|
133
|
-
|
|
134
|
-
- Uses 7z/peazip/unar (not 7za/7zr) to try to extract archives
|
|
135
|
-
- No substantial limit on the number of archive layers
|
|
136
|
-
- Identifies file types via libmagic
|
|
137
|
-
- Basic archive bomb protections
|
|
138
|
-
|
|
139
151
|
## TODOs
|
|
140
152
|
|
|
141
|
-
-
|
|
153
|
+
- More archive bomb protections
|
|
154
|
+
- min_total_size_bytes - (NYI) ensures the total extracted footprint isn't above a certain size
|
|
155
|
+
- min_free_space - minimum free space at the extraction location
|
|
156
|
+
- delete_archives_as_exploded - remove archive files to reduce duplication (boolean)
|
|
157
|
+
- max_archive_depth - allow setting a maximum archive depth
|
|
142
158
|
- Improve archive type detection
|
|
143
159
|
- Separate between extractable and non-extractable archive types
|
|
144
|
-
- Protect against/detect depth attacks (excessively nested archives)
|
|
145
160
|
- Detect password-protected archives
|
|
146
161
|
- Allow supplying archive passwords by hash
|
|
147
162
|
- Add custom magic to detect obscure archive formats
|
|
@@ -15,6 +15,25 @@ Archae explodes archives.
|
|
|
15
15
|
|
|
16
16
|
<!-- end docs-include-index -->
|
|
17
17
|
|
|
18
|
+
## Why
|
|
19
|
+
|
|
20
|
+
Every once and a while, I run into an issue: multiple layers of archives. The reasons vary, but examples would include:
|
|
21
|
+
|
|
22
|
+
- Searching for something in a ZIP of folders that contained a ZIP with a CD image in it
|
|
23
|
+
- Running a malware scan and finding an obscure archive format was missed and not even flagged
|
|
24
|
+
Meanwhile, I want to make sure I don't fill my disk, especially if an archive bomb (more commonly known as a ZIP bomb) has been jammed in somewhere. They're only funny the first time. :D
|
|
25
|
+
|
|
26
|
+
## Features
|
|
27
|
+
|
|
28
|
+
- Uses 7z/peazip/unar (not 7za/7zr) to try to extract archives
|
|
29
|
+
- No substantial limit on the number of archive layers
|
|
30
|
+
- Identifies file types via libmagic
|
|
31
|
+
- Detects duplicate archives
|
|
32
|
+
- Basic archive bomb protections
|
|
33
|
+
- min_archive_size_bytes - ensures the uncompressed size of an archive is limited
|
|
34
|
+
- min_total_size_bytes - ensures the total extracted footprint isn't above a certain size
|
|
35
|
+
- min_archive_ratio - ensures very-high-compression-ratio archives are stopped
|
|
36
|
+
|
|
18
37
|
## Installation
|
|
19
38
|
|
|
20
39
|
<!-- start docs-include-installation -->
|
|
@@ -79,19 +98,15 @@ archae --help
|
|
|
79
98
|
|
|
80
99
|
<!-- end docs-include-usage -->
|
|
81
100
|
|
|
82
|
-
## Features
|
|
83
|
-
|
|
84
|
-
- Uses 7z/peazip/unar (not 7za/7zr) to try to extract archives
|
|
85
|
-
- No substantial limit on the number of archive layers
|
|
86
|
-
- Identifies file types via libmagic
|
|
87
|
-
- Basic archive bomb protections
|
|
88
|
-
|
|
89
101
|
## TODOs
|
|
90
102
|
|
|
91
|
-
-
|
|
103
|
+
- More archive bomb protections
|
|
104
|
+
- min_total_size_bytes - (NYI) ensures the total extracted footprint isn't above a certain size
|
|
105
|
+
- min_free_space - minimum free space at the extraction location
|
|
106
|
+
- delete_archives_as_exploded - remove archive files to reduce duplication (boolean)
|
|
107
|
+
- max_archive_depth - allow setting a maximum archive depth
|
|
92
108
|
- Improve archive type detection
|
|
93
109
|
- Separate between extractable and non-extractable archive types
|
|
94
|
-
- Protect against/detect depth attacks (excessively nested archives)
|
|
95
110
|
- Detect password-protected archives
|
|
96
111
|
- Allow supplying archive passwords by hash
|
|
97
112
|
- Add custom magic to detect obscure archive formats
|
|
@@ -64,28 +64,19 @@ class FileSizeParamType(click.ParamType):
|
|
|
64
64
|
pass
|
|
65
65
|
|
|
66
66
|
# Regex to split number and unit
|
|
67
|
-
match = re.match(r"^(\d+(?:\.\d+)?)\s*([
|
|
67
|
+
match = re.match(r"^(\d+(?:\.\d+)?)\s*([KMGTP]B?)$", str(value), re.IGNORECASE)
|
|
68
68
|
if not match:
|
|
69
69
|
msg = f"{value} is not a valid file size (e.g., 10G, 500M)"
|
|
70
70
|
raise ValueError(msg)
|
|
71
71
|
|
|
72
72
|
number, unit = match.groups()
|
|
73
73
|
number = float(number)
|
|
74
|
-
unit = unit.upper()
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
"K": 1024,
|
|
78
|
-
"KB": 1024,
|
|
79
|
-
"M": 1024**2,
|
|
80
|
-
"MB": 1024**2,
|
|
81
|
-
"G": 1024**3,
|
|
82
|
-
"GB": 1024**3,
|
|
83
|
-
"T": 1024**4,
|
|
84
|
-
"TB": 1024**4,
|
|
85
|
-
}
|
|
74
|
+
unit = unit[0].upper()
|
|
75
|
+
|
|
76
|
+
byte_scale = 1024 ** (ByteScale.from_prefix_letter(unit).value)
|
|
86
77
|
|
|
87
78
|
# Default to bytes if no specific unit multiplier, or assume B
|
|
88
|
-
return int(number *
|
|
79
|
+
return int(number * byte_scale)
|
|
89
80
|
|
|
90
81
|
def convert(self, value: click.Option, param: str, ctx: click.Context) -> int:
|
|
91
82
|
"""Convert a FileSizeParam to an int.
|
|
@@ -211,6 +202,13 @@ def handle_file(file_path: Path) -> None:
|
|
|
211
202
|
click.echo(
|
|
212
203
|
f"Skipped archive {file_path} because expected size {extracted_size} is greater than max_archive_size_bytes {config['max_archive_size_bytes']}"
|
|
213
204
|
)
|
|
205
|
+
elif (
|
|
206
|
+
get_tracked_file_size() + extracted_size
|
|
207
|
+
> config["max_total_size_bytes"]
|
|
208
|
+
):
|
|
209
|
+
click.echo(
|
|
210
|
+
f"Skipped archive {file_path} because expected size {extracted_size} + current tracked files {get_tracked_file_size()} is greater than max_total_size_bytes {config['max_total_size_bytes']}"
|
|
211
|
+
)
|
|
214
212
|
elif compression_ratio < config["min_archive_ratio"]:
|
|
215
213
|
click.echo(
|
|
216
214
|
f"Skipped archive {file_path} because compression ratio {compression_ratio:.5f} is less than min_archive_ratio {config['min_archive_ratio']}"
|
|
@@ -372,3 +370,12 @@ def add_metadata_to_hash(hash: str, key: str, value: Any) -> None:
|
|
|
372
370
|
value (Any): The metadata value.
|
|
373
371
|
"""
|
|
374
372
|
tracked_files[hash]["metadata"][key] = value
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def get_tracked_file_size() -> int:
|
|
376
|
+
"""Get the total size of all tracked files.
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
int: The total size in bytes.
|
|
380
|
+
"""
|
|
381
|
+
return sum(tracked_files[hash].get("size", 0) for hash in tracked_files)
|
|
@@ -44,3 +44,12 @@ class ByteScale(Enum):
|
|
|
44
44
|
def prefix_letter(self, value: str) -> None:
|
|
45
45
|
"""Setter for prefix letter."""
|
|
46
46
|
self._prefix_letter = value
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def from_prefix_letter(prefix_letter: str) -> Self: # type: ignore[misc]
|
|
50
|
+
"""Static method to look up from a prefix_letter."""
|
|
51
|
+
for member in ByteScale:
|
|
52
|
+
if member.prefix_letter == prefix_letter.upper():
|
|
53
|
+
return member
|
|
54
|
+
msg = f"'{prefix_letter}' is not a valid byte scale prefix letter."
|
|
55
|
+
raise ValueError(msg)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|