archive-extractor 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: archive-extractor
3
+ Version: 0.1.2
4
+ Summary: 📦 Recursively extract ZIP and 7z archives from directory trees, with password-cracking support
5
+ Author-email: Tiago Silva <eng.tiago.silva@gmail.com>
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.12
8
+ Requires-Dist: py7zr
9
+ Requires-Dist: tqdm
@@ -0,0 +1,6 @@
1
+ main.py,sha256=qlje5PrHKnZyS1EIpD5aK2G26NdaCD0ZfHhX_Mk2kp4,5461
2
+ archive_extractor-0.1.2.dist-info/METADATA,sha256=gbaY-yt-buB2MFKhlTP9QkVm0PbcbftK74azno5wzAQ,309
3
+ archive_extractor-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
4
+ archive_extractor-0.1.2.dist-info/entry_points.txt,sha256=IuJfvHFM_0ztmmO8RnQ1ZrnVAhNTorGmpRT6CwDkuyM,48
5
+ archive_extractor-0.1.2.dist-info/licenses/LICENSE,sha256=gTrdDdqFDu7VtWezebCYIHmebmc-XTVxqWTZGKvDux0,1068
6
+ archive_extractor-0.1.2.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ archive-extractor = main:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Tiago Silva
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
main.py ADDED
@@ -0,0 +1,131 @@
1
+ import os
2
+ import re
3
+ import argparse
4
+ from tqdm import tqdm
5
+ import zipfile
6
+ import py7zr
7
+ import lzma
8
+
9
+ def sanitize_filename(filename):
10
+ # Remove directories and illegal characters
11
+ filename = re.sub(r'[\\/*?:"<>|]', "_", filename)
12
+ filename = filename.replace("..", "") # extra safety
13
+ return os.path.basename(filename)
14
+
15
+ def find_archive_files(root_path):
16
+ """Recursively yield paths to all .zip and .7z files under root_path."""
17
+ for dirpath, _, filenames in os.walk(root_path):
18
+ for fname in filenames:
19
+ if fname.lower().endswith('.zip') or fname.lower().endswith('.7z'):
20
+ yield os.path.join(dirpath, fname)
21
+
22
+ def load_passwords(password_file):
23
+ """Load passwords from a file, one per line, stripping whitespace."""
24
+ with open(password_file, 'r', encoding='utf-8') as f:
25
+ return [line.strip() for line in f if line.strip()]
26
+
27
+ def extract_zip(zip_file, output_dir, passwords=None):
28
+ if not os.path.exists(output_dir):
29
+ os.makedirs(output_dir)
30
+
31
+ with zipfile.ZipFile(zip_file, 'r') as zf:
32
+ members = zf.infolist()
33
+ extracted = False
34
+ if not passwords:
35
+ # No passwords provided, extract directly
36
+ for member in tqdm(members, desc=f"Extracting {os.path.basename(zip_file)}"):
37
+ if member.is_dir():
38
+ continue
39
+ safe_member_path = os.path.normpath(member.filename)
40
+ if os.path.isabs(safe_member_path) or safe_member_path.startswith(".."):
41
+ continue
42
+ out_path = os.path.join(output_dir, safe_member_path)
43
+ out_dir = os.path.dirname(out_path)
44
+ if not os.path.exists(out_dir):
45
+ os.makedirs(out_dir)
46
+ with open(out_path, 'wb') as f:
47
+ f.write(zf.read(member))
48
+ extracted = True
49
+ else:
50
+ # Try each password for the whole zip
51
+ for pwd in passwords:
52
+ try:
53
+ for member in tqdm(members, desc=f"Extracting {os.path.basename(zip_file)}", leave=False):
54
+ if member.is_dir():
55
+ continue
56
+ safe_member_path = os.path.normpath(member.filename)
57
+ if os.path.isabs(safe_member_path) or safe_member_path.startswith(".."):
58
+ continue
59
+ out_path = os.path.join(output_dir, safe_member_path)
60
+ out_dir = os.path.dirname(out_path)
61
+ if not os.path.exists(out_dir):
62
+ os.makedirs(out_dir)
63
+ with open(out_path, 'wb') as f:
64
+ f.write(zf.read(member, pwd.encode('utf-8')))
65
+ extracted = True
66
+ break # Stop trying passwords after success
67
+ except RuntimeError:
68
+ # Wrong password, try next
69
+ continue
70
+ except zipfile.BadZipFile:
71
+ continue
72
+ if not extracted:
73
+ print(f"❌ Could not extract '{zip_file}': no valid password found.")
74
+ else:
75
+ print(f"✅ Extracted {len(members)} items to '{output_dir}'.")
76
+
77
+ def extract_7z(archive_file, output_dir, passwords=None):
78
+ if not os.path.exists(output_dir):
79
+ os.makedirs(output_dir)
80
+
81
+ extracted = False
82
+ if not passwords:
83
+ try:
84
+ with py7zr.SevenZipFile(archive_file, mode='r') as archive:
85
+ archive.extractall(path=output_dir)
86
+ extracted = True
87
+ except (py7zr.exceptions.PasswordRequired, py7zr.exceptions.Bad7zFile, lzma.LZMAError):
88
+ pass
89
+ except Exception:
90
+ pass
91
+ else:
92
+ for pwd in passwords:
93
+ try:
94
+ with py7zr.SevenZipFile(archive_file, mode='r', password=pwd) as archive:
95
+ archive.extractall(path=output_dir)
96
+ extracted = True
97
+ break
98
+ except (py7zr.exceptions.PasswordRequired, py7zr.exceptions.Bad7zFile, lzma.LZMAError):
99
+ continue
100
+ except Exception:
101
+ continue
102
+ if not extracted:
103
+ print(f"❌ Could not extract '{archive_file}': no valid password found or archive is corrupt.")
104
+ else:
105
+ print(f"✅ Extracted '{archive_file}' to '{output_dir}'.")
106
+
107
+ def main():
108
+ parser = argparse.ArgumentParser(
109
+ description="Recursively extract all files from .zip and .7z archives under a given path."
110
+ )
111
+ parser.add_argument(
112
+ "path",
113
+ help="Root directory or file to search for .zip/.7z files"
114
+ )
115
+ parser.add_argument(
116
+ "--passwords",
117
+ help="Path to a file containing passwords (one per line) to try for encrypted archives"
118
+ )
119
+ args = parser.parse_args()
120
+ root_path = args.path
121
+ passwords = load_passwords(args.passwords) if args.passwords else None
122
+ for archive_path in find_archive_files(root_path):
123
+ archive_dir = os.path.splitext(archive_path)[0]
124
+ ext = os.path.splitext(archive_path)[1].lower()
125
+ if ext == ".zip":
126
+ extract_zip(archive_path, archive_dir, passwords)
127
+ elif ext == ".7z":
128
+ extract_7z(archive_path, archive_dir, passwords)
129
+
130
+ if __name__ == "__main__":
131
+ main()