archive-extractor 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: archive-extractor
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: 📦 Recursively extract ZIP and 7z archives from directory trees, with password-cracking support
|
|
5
|
+
Author-email: Tiago Silva <eng.tiago.silva@gmail.com>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Requires-Dist: py7zr
|
|
9
|
+
Requires-Dist: tqdm
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
main.py,sha256=qlje5PrHKnZyS1EIpD5aK2G26NdaCD0ZfHhX_Mk2kp4,5461
|
|
2
|
+
archive_extractor-0.1.2.dist-info/METADATA,sha256=gbaY-yt-buB2MFKhlTP9QkVm0PbcbftK74azno5wzAQ,309
|
|
3
|
+
archive_extractor-0.1.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
4
|
+
archive_extractor-0.1.2.dist-info/entry_points.txt,sha256=IuJfvHFM_0ztmmO8RnQ1ZrnVAhNTorGmpRT6CwDkuyM,48
|
|
5
|
+
archive_extractor-0.1.2.dist-info/licenses/LICENSE,sha256=gTrdDdqFDu7VtWezebCYIHmebmc-XTVxqWTZGKvDux0,1068
|
|
6
|
+
archive_extractor-0.1.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Tiago Silva
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
main.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import argparse
|
|
4
|
+
from tqdm import tqdm
|
|
5
|
+
import zipfile
|
|
6
|
+
import py7zr
|
|
7
|
+
import lzma
|
|
8
|
+
|
|
9
|
+
def sanitize_filename(filename):
|
|
10
|
+
# Remove directories and illegal characters
|
|
11
|
+
filename = re.sub(r'[\\/*?:"<>|]', "_", filename)
|
|
12
|
+
filename = filename.replace("..", "") # extra safety
|
|
13
|
+
return os.path.basename(filename)
|
|
14
|
+
|
|
15
|
+
def find_archive_files(root_path):
|
|
16
|
+
"""Recursively yield paths to all .zip and .7z files under root_path."""
|
|
17
|
+
for dirpath, _, filenames in os.walk(root_path):
|
|
18
|
+
for fname in filenames:
|
|
19
|
+
if fname.lower().endswith('.zip') or fname.lower().endswith('.7z'):
|
|
20
|
+
yield os.path.join(dirpath, fname)
|
|
21
|
+
|
|
22
|
+
def load_passwords(password_file):
|
|
23
|
+
"""Load passwords from a file, one per line, stripping whitespace."""
|
|
24
|
+
with open(password_file, 'r', encoding='utf-8') as f:
|
|
25
|
+
return [line.strip() for line in f if line.strip()]
|
|
26
|
+
|
|
27
|
+
def extract_zip(zip_file, output_dir, passwords=None):
|
|
28
|
+
if not os.path.exists(output_dir):
|
|
29
|
+
os.makedirs(output_dir)
|
|
30
|
+
|
|
31
|
+
with zipfile.ZipFile(zip_file, 'r') as zf:
|
|
32
|
+
members = zf.infolist()
|
|
33
|
+
extracted = False
|
|
34
|
+
if not passwords:
|
|
35
|
+
# No passwords provided, extract directly
|
|
36
|
+
for member in tqdm(members, desc=f"Extracting {os.path.basename(zip_file)}"):
|
|
37
|
+
if member.is_dir():
|
|
38
|
+
continue
|
|
39
|
+
safe_member_path = os.path.normpath(member.filename)
|
|
40
|
+
if os.path.isabs(safe_member_path) or safe_member_path.startswith(".."):
|
|
41
|
+
continue
|
|
42
|
+
out_path = os.path.join(output_dir, safe_member_path)
|
|
43
|
+
out_dir = os.path.dirname(out_path)
|
|
44
|
+
if not os.path.exists(out_dir):
|
|
45
|
+
os.makedirs(out_dir)
|
|
46
|
+
with open(out_path, 'wb') as f:
|
|
47
|
+
f.write(zf.read(member))
|
|
48
|
+
extracted = True
|
|
49
|
+
else:
|
|
50
|
+
# Try each password for the whole zip
|
|
51
|
+
for pwd in passwords:
|
|
52
|
+
try:
|
|
53
|
+
for member in tqdm(members, desc=f"Extracting {os.path.basename(zip_file)}", leave=False):
|
|
54
|
+
if member.is_dir():
|
|
55
|
+
continue
|
|
56
|
+
safe_member_path = os.path.normpath(member.filename)
|
|
57
|
+
if os.path.isabs(safe_member_path) or safe_member_path.startswith(".."):
|
|
58
|
+
continue
|
|
59
|
+
out_path = os.path.join(output_dir, safe_member_path)
|
|
60
|
+
out_dir = os.path.dirname(out_path)
|
|
61
|
+
if not os.path.exists(out_dir):
|
|
62
|
+
os.makedirs(out_dir)
|
|
63
|
+
with open(out_path, 'wb') as f:
|
|
64
|
+
f.write(zf.read(member, pwd.encode('utf-8')))
|
|
65
|
+
extracted = True
|
|
66
|
+
break # Stop trying passwords after success
|
|
67
|
+
except RuntimeError:
|
|
68
|
+
# Wrong password, try next
|
|
69
|
+
continue
|
|
70
|
+
except zipfile.BadZipFile:
|
|
71
|
+
continue
|
|
72
|
+
if not extracted:
|
|
73
|
+
print(f"❌ Could not extract '{zip_file}': no valid password found.")
|
|
74
|
+
else:
|
|
75
|
+
print(f"✅ Extracted {len(members)} items to '{output_dir}'.")
|
|
76
|
+
|
|
77
|
+
def extract_7z(archive_file, output_dir, passwords=None):
|
|
78
|
+
if not os.path.exists(output_dir):
|
|
79
|
+
os.makedirs(output_dir)
|
|
80
|
+
|
|
81
|
+
extracted = False
|
|
82
|
+
if not passwords:
|
|
83
|
+
try:
|
|
84
|
+
with py7zr.SevenZipFile(archive_file, mode='r') as archive:
|
|
85
|
+
archive.extractall(path=output_dir)
|
|
86
|
+
extracted = True
|
|
87
|
+
except (py7zr.exceptions.PasswordRequired, py7zr.exceptions.Bad7zFile, lzma.LZMAError):
|
|
88
|
+
pass
|
|
89
|
+
except Exception:
|
|
90
|
+
pass
|
|
91
|
+
else:
|
|
92
|
+
for pwd in passwords:
|
|
93
|
+
try:
|
|
94
|
+
with py7zr.SevenZipFile(archive_file, mode='r', password=pwd) as archive:
|
|
95
|
+
archive.extractall(path=output_dir)
|
|
96
|
+
extracted = True
|
|
97
|
+
break
|
|
98
|
+
except (py7zr.exceptions.PasswordRequired, py7zr.exceptions.Bad7zFile, lzma.LZMAError):
|
|
99
|
+
continue
|
|
100
|
+
except Exception:
|
|
101
|
+
continue
|
|
102
|
+
if not extracted:
|
|
103
|
+
print(f"❌ Could not extract '{archive_file}': no valid password found or archive is corrupt.")
|
|
104
|
+
else:
|
|
105
|
+
print(f"✅ Extracted '{archive_file}' to '{output_dir}'.")
|
|
106
|
+
|
|
107
|
+
def main():
|
|
108
|
+
parser = argparse.ArgumentParser(
|
|
109
|
+
description="Recursively extract all files from .zip and .7z archives under a given path."
|
|
110
|
+
)
|
|
111
|
+
parser.add_argument(
|
|
112
|
+
"path",
|
|
113
|
+
help="Root directory or file to search for .zip/.7z files"
|
|
114
|
+
)
|
|
115
|
+
parser.add_argument(
|
|
116
|
+
"--passwords",
|
|
117
|
+
help="Path to a file containing passwords (one per line) to try for encrypted archives"
|
|
118
|
+
)
|
|
119
|
+
args = parser.parse_args()
|
|
120
|
+
root_path = args.path
|
|
121
|
+
passwords = load_passwords(args.passwords) if args.passwords else None
|
|
122
|
+
for archive_path in find_archive_files(root_path):
|
|
123
|
+
archive_dir = os.path.splitext(archive_path)[0]
|
|
124
|
+
ext = os.path.splitext(archive_path)[1].lower()
|
|
125
|
+
if ext == ".zip":
|
|
126
|
+
extract_zip(archive_path, archive_dir, passwords)
|
|
127
|
+
elif ext == ".7z":
|
|
128
|
+
extract_7z(archive_path, archive_dir, passwords)
|
|
129
|
+
|
|
130
|
+
if __name__ == "__main__":
|
|
131
|
+
main()
|