archae 2026.1.0__tar.gz → 2026.1.0b2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: archae
3
- Version: 2026.1.0
3
+ Version: 2026.1.0b2
4
4
  Summary: Archae explodes archives.
5
5
  Keywords:
6
6
  Author: Shawn McNaughton
@@ -65,6 +65,25 @@ Archae explodes archives.
65
65
 
66
66
  <!-- end docs-include-index -->
67
67
 
68
+ ## Why
69
+
70
+ Every once and a while, I run into an issue: multiple layers of archives. The reasons vary, but examples would include:
71
+
72
+ - Searching for something in a ZIP of folders that contained a ZIP with a CD image in it
73
+ - Running a malware scan and finding an obscure archive format was missed and not even flagged
74
+ Meanwhile, I want to make sure I don't fill my disk, especially if an archive bomb (more commonly known as a ZIP bomb) has been jammed in somewhere. They're only funny the first time. :D
75
+
76
+ ## Features
77
+
78
+ - Uses 7z/peazip/unar (not 7za/7zr) to try to extract archives
79
+ - No substantial limit on the number of archive layers
80
+ - Identifies file types via libmagic
81
+ - Detects duplicate archives
82
+ - Basic archive bomb protections
83
+ - min_archive_size_bytes - ensures the uncompressed size of an archive is limited
84
+ - min_total_size_bytes - ensures the total extracted footprint isn't above a certain size
85
+ - min_archive_ratio - ensures very-high-compression-ratio archives are stopped
86
+
68
87
  ## Installation
69
88
 
70
89
  <!-- start docs-include-installation -->
@@ -129,19 +148,15 @@ archae --help
129
148
 
130
149
  <!-- end docs-include-usage -->
131
150
 
132
- ## Features
133
-
134
- - Uses 7z/peazip/unar (not 7za/7zr) to try to extract archives
135
- - No substantial limit on the number of archive layers
136
- - Identifies file types via libmagic
137
- - Basic archive bomb protections
138
-
139
151
  ## TODOs
140
152
 
141
- - Protect against/detect archive bombs (malicious disk usage)
153
+ - More archive bomb protections
154
+ - min_total_size_bytes - (NYI) ensures the total extracted footprint isn't above a certain size
155
+ - min_free_space - minimum free space at the extraction location
156
+ - delete_archives_as_exploded - remove archive files to reduce duplication (boolean)
157
+ - max_archive_depth - allow setting a maximum archive depth
142
158
  - Improve archive type detection
143
159
  - Separate between extractable and non-extractable archive types
144
- - Protect against/detect depth attacks (excessively nested archives)
145
160
  - Detect password-protected archives
146
161
  - Allow supplying archive passwords by hash
147
162
  - Add custom magic to detect obscure archive formats
@@ -15,6 +15,25 @@ Archae explodes archives.
15
15
 
16
16
  <!-- end docs-include-index -->
17
17
 
18
+ ## Why
19
+
20
+ Every once and a while, I run into an issue: multiple layers of archives. The reasons vary, but examples would include:
21
+
22
+ - Searching for something in a ZIP of folders that contained a ZIP with a CD image in it
23
+ - Running a malware scan and finding an obscure archive format was missed and not even flagged
24
+ Meanwhile, I want to make sure I don't fill my disk, especially if an archive bomb (more commonly known as a ZIP bomb) has been jammed in somewhere. They're only funny the first time. :D
25
+
26
+ ## Features
27
+
28
+ - Uses 7z/peazip/unar (not 7za/7zr) to try to extract archives
29
+ - No substantial limit on the number of archive layers
30
+ - Identifies file types via libmagic
31
+ - Detects duplicate archives
32
+ - Basic archive bomb protections
33
+ - min_archive_size_bytes - ensures the uncompressed size of an archive is limited
34
+ - min_total_size_bytes - ensures the total extracted footprint isn't above a certain size
35
+ - min_archive_ratio - ensures very-high-compression-ratio archives are stopped
36
+
18
37
  ## Installation
19
38
 
20
39
  <!-- start docs-include-installation -->
@@ -79,19 +98,15 @@ archae --help
79
98
 
80
99
  <!-- end docs-include-usage -->
81
100
 
82
- ## Features
83
-
84
- - Uses 7z/peazip/unar (not 7za/7zr) to try to extract archives
85
- - No substantial limit on the number of archive layers
86
- - Identifies file types via libmagic
87
- - Basic archive bomb protections
88
-
89
101
  ## TODOs
90
102
 
91
- - Protect against/detect archive bombs (malicious disk usage)
103
+ - More archive bomb protections
104
+ - min_total_size_bytes - (NYI) ensures the total extracted footprint isn't above a certain size
105
+ - min_free_space - minimum free space at the extraction location
106
+ - delete_archives_as_exploded - remove archive files to reduce duplication (boolean)
107
+ - max_archive_depth - allow setting a maximum archive depth
92
108
  - Improve archive type detection
93
109
  - Separate between extractable and non-extractable archive types
94
- - Protect against/detect depth attacks (excessively nested archives)
95
110
  - Detect password-protected archives
96
111
  - Allow supplying archive passwords by hash
97
112
  - Add custom magic to detect obscure archive formats
@@ -4,7 +4,7 @@ build-backend = "uv_build"
4
4
 
5
5
  [project]
6
6
  name = "archae"
7
- version = "2026.1.0"
7
+ version = "2026.1.0b2"
8
8
  description = "Archae explodes archives."
9
9
  authors = [{name = "Shawn McNaughton", email = "shawngmc@gmail.com"}]
10
10
  readme = "README.md"
@@ -64,28 +64,19 @@ class FileSizeParamType(click.ParamType):
64
64
  pass
65
65
 
66
66
  # Regex to split number and unit
67
- match = re.match(r"^(\d+(?:\.\d+)?)\s*([KMGT]B?)$", str(value), re.IGNORECASE)
67
+ match = re.match(r"^(\d+(?:\.\d+)?)\s*([KMGTP]B?)$", str(value), re.IGNORECASE)
68
68
  if not match:
69
69
  msg = f"{value} is not a valid file size (e.g., 10G, 500M)"
70
70
  raise ValueError(msg)
71
71
 
72
72
  number, unit = match.groups()
73
73
  number = float(number)
74
- unit = unit.upper()
75
-
76
- units = {
77
- "K": 1024,
78
- "KB": 1024,
79
- "M": 1024**2,
80
- "MB": 1024**2,
81
- "G": 1024**3,
82
- "GB": 1024**3,
83
- "T": 1024**4,
84
- "TB": 1024**4,
85
- }
74
+ unit = unit[0].upper()
75
+
76
+ byte_scale = 1024 ** (ByteScale.from_prefix_letter(unit).value)
86
77
 
87
78
  # Default to bytes if no specific unit multiplier, or assume B
88
- return int(number * units.get(unit, 1))
79
+ return int(number * byte_scale)
89
80
 
90
81
  def convert(self, value: click.Option, param: str, ctx: click.Context) -> int:
91
82
  """Convert a FileSizeParam to an int.
@@ -211,6 +202,13 @@ def handle_file(file_path: Path) -> None:
211
202
  click.echo(
212
203
  f"Skipped archive {file_path} because expected size {extracted_size} is greater than max_archive_size_bytes {config['max_archive_size_bytes']}"
213
204
  )
205
+ elif (
206
+ get_tracked_file_size() + extracted_size
207
+ > config["max_total_size_bytes"]
208
+ ):
209
+ click.echo(
210
+ f"Skipped archive {file_path} because expected size {extracted_size} + current tracked files {get_tracked_file_size()} is greater than max_total_size_bytes {config['max_total_size_bytes']}"
211
+ )
214
212
  elif compression_ratio < config["min_archive_ratio"]:
215
213
  click.echo(
216
214
  f"Skipped archive {file_path} because compression ratio {compression_ratio:.5f} is less than min_archive_ratio {config['min_archive_ratio']}"
@@ -372,3 +370,12 @@ def add_metadata_to_hash(hash: str, key: str, value: Any) -> None:
372
370
  value (Any): The metadata value.
373
371
  """
374
372
  tracked_files[hash]["metadata"][key] = value
373
+
374
+
375
+ def get_tracked_file_size() -> int:
376
+ """Get the total size of all tracked files.
377
+
378
+ Returns:
379
+ int: The total size in bytes.
380
+ """
381
+ return sum(tracked_files[hash].get("size", 0) for hash in tracked_files)
@@ -44,3 +44,12 @@ class ByteScale(Enum):
44
44
  def prefix_letter(self, value: str) -> None:
45
45
  """Setter for prefix letter."""
46
46
  self._prefix_letter = value
47
+
48
+ @staticmethod
49
+ def from_prefix_letter(prefix_letter: str) -> Self: # type: ignore[misc]
50
+ """Static method to look up from a prefix_letter."""
51
+ for member in ByteScale:
52
+ if member.prefix_letter == prefix_letter.upper():
53
+ return member
54
+ msg = f"'{prefix_letter}' is not a valid byte scale prefix letter."
55
+ raise ValueError(msg)
File without changes