archive-extractor 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,118 @@
1
+ """Archive Extractor - Recursively extract ZIP and 7z archives.
2
+
3
+ CLI Usage:
4
+ archive-extractor /path/to/search
5
+ archive-extractor /path/to/search --passwords passwords.txt
6
+
7
+ Library Usage:
8
+ from archive_extractor import extract_archives
9
+
10
+ # Extract all archives in a directory
11
+ results = extract_archives("/path/to/search")
12
+
13
+ # Extract a single archive
14
+ results = extract_archives("/path/to/archive.zip")
15
+
16
+ # With passwords
17
+ results = extract_archives("/path/to/search", passwords=["pass1", "pass2"])
18
+
19
+ # Custom output directory
20
+ results = extract_archives("/path/to/search", output_dir="/path/to/output")
21
+
22
+ # Silent mode (no progress bars)
23
+ results = extract_archives("/path/to/search", show_progress=False)
24
+ """
25
+
26
+ import argparse
27
+ import os
28
+
29
+ from .core import (
30
+ find_archive_files,
31
+ load_passwords,
32
+ extract_zip_archive,
33
+ extract_7z_archive,
34
+ )
35
+
36
+ __all__ = ["extract_archives"]
37
+
38
+
39
+ def extract_archives(
40
+ path: str,
41
+ output_dir: str | None = None,
42
+ passwords: list[str] | None = None,
43
+ show_progress: bool = True
44
+ ) -> dict[str, int]:
45
+ """Extract all archives found at the given path.
46
+
47
+ Args:
48
+ path: Single archive file or directory to search for archives.
49
+ output_dir: Optional base directory for extraction output.
50
+ If None, each archive extracts to a sibling directory named after the archive.
51
+ passwords: Optional list of password strings to try for encrypted archives.
52
+ show_progress: Whether to show progress bars during extraction.
53
+
54
+ Returns:
55
+ Dictionary mapping archive paths to extraction counts.
56
+ A count of -1 indicates extraction failure.
57
+ """
58
+ results = {}
59
+
60
+ for archive_path in find_archive_files(path):
61
+ if output_dir:
62
+ archive_name = os.path.splitext(os.path.basename(archive_path))[0]
63
+ dest_dir = os.path.join(output_dir, archive_name)
64
+ else:
65
+ dest_dir = os.path.splitext(archive_path)[0]
66
+
67
+ ext = os.path.splitext(archive_path)[1].lower()
68
+
69
+ if ext == ".zip":
70
+ count = extract_zip_archive(archive_path, dest_dir, passwords, show_progress)
71
+ elif ext == ".7z":
72
+ count = extract_7z_archive(archive_path, dest_dir, passwords, show_progress)
73
+ else:
74
+ continue
75
+
76
+ results[archive_path] = count
77
+
78
+ if show_progress:
79
+ if count >= 0:
80
+ print(f"Extracted '{archive_path}' to '{dest_dir}'.")
81
+ else:
82
+ print(f"Could not extract '{archive_path}': no valid password found or archive is corrupt.")
83
+
84
+ return results
85
+
86
+
87
+ def main():
88
+ """CLI entry point for archive-extractor."""
89
+ parser = argparse.ArgumentParser(
90
+ description="Recursively extract all files from .zip and .7z archives under a given path."
91
+ )
92
+ parser.add_argument(
93
+ "path",
94
+ help="Root directory or file to search for .zip/.7z files"
95
+ )
96
+ parser.add_argument(
97
+ "--passwords",
98
+ help="Path to a file containing passwords (one per line) to try for encrypted archives"
99
+ )
100
+ parser.add_argument(
101
+ "--output-dir",
102
+ help="Base directory for extraction output (default: sibling directory of each archive)"
103
+ )
104
+ parser.add_argument(
105
+ "--quiet", "-q",
106
+ action="store_true",
107
+ help="Suppress progress output"
108
+ )
109
+ args = parser.parse_args()
110
+
111
+ passwords = load_passwords(args.passwords) if args.passwords else None
112
+
113
+ extract_archives(
114
+ args.path,
115
+ output_dir=args.output_dir,
116
+ passwords=passwords,
117
+ show_progress=not args.quiet
118
+ )
@@ -0,0 +1,160 @@
1
+ """Core extraction logic for archive-extractor."""
2
+
3
+ import os
4
+ import re
5
+ import zipfile
6
+ import lzma
7
+
8
+ import py7zr
9
+ from tqdm import tqdm
10
+
11
+
12
+ def sanitize_filename(filename: str) -> str:
13
+ """Remove directories and illegal characters from a filename.
14
+
15
+ Args:
16
+ filename: The filename to sanitize.
17
+
18
+ Returns:
19
+ A safe filename with illegal characters replaced and path components removed.
20
+ """
21
+ filename = re.sub(r'[\\/*?:"<>|]', "_", filename)
22
+ filename = filename.replace("..", "")
23
+ return os.path.basename(filename)
24
+
25
+
26
+ def find_archive_files(root_path: str):
27
+ """Recursively yield paths to all .zip and .7z files under root_path.
28
+
29
+ Args:
30
+ root_path: Directory to search, or a single archive file path.
31
+
32
+ Yields:
33
+ Absolute paths to archive files found.
34
+ """
35
+ if os.path.isfile(root_path):
36
+ ext = os.path.splitext(root_path)[1].lower()
37
+ if ext in ('.zip', '.7z'):
38
+ yield root_path
39
+ return
40
+
41
+ for dirpath, _, filenames in os.walk(root_path):
42
+ for fname in filenames:
43
+ if fname.lower().endswith('.zip') or fname.lower().endswith('.7z'):
44
+ yield os.path.join(dirpath, fname)
45
+
46
+
47
+ def load_passwords(password_file: str) -> list[str]:
48
+ """Load passwords from a file, one per line, stripping whitespace.
49
+
50
+ Args:
51
+ password_file: Path to a file containing passwords.
52
+
53
+ Returns:
54
+ List of password strings.
55
+ """
56
+ with open(password_file, 'r', encoding='utf-8') as f:
57
+ return [line.strip() for line in f if line.strip()]
58
+
59
+
60
+ def extract_zip_archive(
61
+ zip_file: str,
62
+ output_dir: str,
63
+ passwords: list[str] | None = None,
64
+ show_progress: bool = True
65
+ ) -> int:
66
+ """Extract a ZIP archive to the specified directory.
67
+
68
+ Args:
69
+ zip_file: Path to the ZIP file.
70
+ output_dir: Directory to extract files to.
71
+ passwords: Optional list of passwords to try for encrypted archives.
72
+ show_progress: Whether to show tqdm progress bar.
73
+
74
+ Returns:
75
+ Number of files extracted, or -1 on failure.
76
+ """
77
+ if not os.path.exists(output_dir):
78
+ os.makedirs(output_dir)
79
+
80
+ with zipfile.ZipFile(zip_file, 'r') as zf:
81
+ members = zf.infolist()
82
+ extracted_count = 0
83
+
84
+ def extract_members(pwd_bytes=None):
85
+ nonlocal extracted_count
86
+ iterator = tqdm(members, desc=f"Extracting {os.path.basename(zip_file)}", leave=False) if show_progress else members
87
+ for member in iterator:
88
+ if member.is_dir():
89
+ continue
90
+ safe_member_path = os.path.normpath(member.filename)
91
+ if os.path.isabs(safe_member_path) or safe_member_path.startswith(".."):
92
+ continue
93
+ out_path = os.path.join(output_dir, safe_member_path)
94
+ out_dir = os.path.dirname(out_path)
95
+ if not os.path.exists(out_dir):
96
+ os.makedirs(out_dir)
97
+ with open(out_path, 'wb') as f:
98
+ f.write(zf.read(member, pwd_bytes))
99
+ extracted_count += 1
100
+
101
+ if not passwords:
102
+ try:
103
+ extract_members()
104
+ return extracted_count
105
+ except RuntimeError:
106
+ return -1
107
+ else:
108
+ for pwd in passwords:
109
+ extracted_count = 0
110
+ try:
111
+ extract_members(pwd.encode('utf-8'))
112
+ return extracted_count
113
+ except RuntimeError:
114
+ continue
115
+ except zipfile.BadZipFile:
116
+ continue
117
+ return -1
118
+
119
+
120
+ def extract_7z_archive(
121
+ archive_file: str,
122
+ output_dir: str,
123
+ passwords: list[str] | None = None,
124
+ show_progress: bool = True
125
+ ) -> int:
126
+ """Extract a 7z archive to the specified directory.
127
+
128
+ Args:
129
+ archive_file: Path to the 7z file.
130
+ output_dir: Directory to extract files to.
131
+ passwords: Optional list of passwords to try for encrypted archives.
132
+ show_progress: Whether to show tqdm progress bar (currently unused for 7z).
133
+
134
+ Returns:
135
+ Number of files extracted, or -1 on failure.
136
+ """
137
+ if not os.path.exists(output_dir):
138
+ os.makedirs(output_dir)
139
+
140
+ def try_extract(password=None):
141
+ with py7zr.SevenZipFile(archive_file, mode='r', password=password) as archive:
142
+ archive.extractall(path=output_dir)
143
+ return len(archive.getnames())
144
+
145
+ if not passwords:
146
+ try:
147
+ return try_extract()
148
+ except (py7zr.exceptions.PasswordRequired, py7zr.exceptions.Bad7zFile, lzma.LZMAError):
149
+ return -1
150
+ except Exception:
151
+ return -1
152
+ else:
153
+ for pwd in passwords:
154
+ try:
155
+ return try_extract(pwd)
156
+ except (py7zr.exceptions.PasswordRequired, py7zr.exceptions.Bad7zFile, lzma.LZMAError):
157
+ continue
158
+ except Exception:
159
+ continue
160
+ return -1
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: archive-extractor
3
- Version: 0.1.3
4
- Summary: 📦 Recursively extract ZIP and 7z archives from directory trees, with password-cracking support
3
+ Version: 0.2.0
4
+ Summary: Recursively extract ZIP and 7z archives from directory trees, with password-cracking support
5
5
  Author-email: Tiago Silva <eng.tiago.silva@gmail.com>
6
6
  License-File: LICENSE
7
7
  Requires-Python: >=3.12
@@ -0,0 +1,7 @@
1
+ archive_extractor/__init__.py,sha256=TJK1mnnpytttyxk18NH3BPfknauDKP018mcSBSUMugM,3620
2
+ archive_extractor/core.py,sha256=0_7eD2JGyxHLrh5wk-FanNBIw5jpYii6BAkzy0P2Fyg,5017
3
+ archive_extractor-0.2.0.dist-info/METADATA,sha256=Pam1RoVoEa2lBZAZ_3eT4qZm09Zq13uAra0V2YwEFnA,304
4
+ archive_extractor-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
5
+ archive_extractor-0.2.0.dist-info/entry_points.txt,sha256=0OXNU4FRaPKLaOQFlKVwtXEv_hrZHvOmTPZf7zv1dBg,61
6
+ archive_extractor-0.2.0.dist-info/licenses/LICENSE,sha256=gTrdDdqFDu7VtWezebCYIHmebmc-XTVxqWTZGKvDux0,1068
7
+ archive_extractor-0.2.0.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ archive-extractor = archive_extractor:main
@@ -1,6 +0,0 @@
1
- main.py,sha256=qlje5PrHKnZyS1EIpD5aK2G26NdaCD0ZfHhX_Mk2kp4,5461
2
- archive_extractor-0.1.3.dist-info/METADATA,sha256=dUXegFwVTf3ffn2TKXbte9FDV2NKlDniaf7y-dZ_aqg,309
3
- archive_extractor-0.1.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
4
- archive_extractor-0.1.3.dist-info/entry_points.txt,sha256=IuJfvHFM_0ztmmO8RnQ1ZrnVAhNTorGmpRT6CwDkuyM,48
5
- archive_extractor-0.1.3.dist-info/licenses/LICENSE,sha256=gTrdDdqFDu7VtWezebCYIHmebmc-XTVxqWTZGKvDux0,1068
6
- archive_extractor-0.1.3.dist-info/RECORD,,
@@ -1,2 +0,0 @@
1
- [console_scripts]
2
- archive-extractor = main:main
main.py DELETED
@@ -1,131 +0,0 @@
1
- import os
2
- import re
3
- import argparse
4
- from tqdm import tqdm
5
- import zipfile
6
- import py7zr
7
- import lzma
8
-
9
- def sanitize_filename(filename):
10
- # Remove directories and illegal characters
11
- filename = re.sub(r'[\\/*?:"<>|]', "_", filename)
12
- filename = filename.replace("..", "") # extra safety
13
- return os.path.basename(filename)
14
-
15
- def find_archive_files(root_path):
16
- """Recursively yield paths to all .zip and .7z files under root_path."""
17
- for dirpath, _, filenames in os.walk(root_path):
18
- for fname in filenames:
19
- if fname.lower().endswith('.zip') or fname.lower().endswith('.7z'):
20
- yield os.path.join(dirpath, fname)
21
-
22
- def load_passwords(password_file):
23
- """Load passwords from a file, one per line, stripping whitespace."""
24
- with open(password_file, 'r', encoding='utf-8') as f:
25
- return [line.strip() for line in f if line.strip()]
26
-
27
- def extract_zip(zip_file, output_dir, passwords=None):
28
- if not os.path.exists(output_dir):
29
- os.makedirs(output_dir)
30
-
31
- with zipfile.ZipFile(zip_file, 'r') as zf:
32
- members = zf.infolist()
33
- extracted = False
34
- if not passwords:
35
- # No passwords provided, extract directly
36
- for member in tqdm(members, desc=f"Extracting {os.path.basename(zip_file)}"):
37
- if member.is_dir():
38
- continue
39
- safe_member_path = os.path.normpath(member.filename)
40
- if os.path.isabs(safe_member_path) or safe_member_path.startswith(".."):
41
- continue
42
- out_path = os.path.join(output_dir, safe_member_path)
43
- out_dir = os.path.dirname(out_path)
44
- if not os.path.exists(out_dir):
45
- os.makedirs(out_dir)
46
- with open(out_path, 'wb') as f:
47
- f.write(zf.read(member))
48
- extracted = True
49
- else:
50
- # Try each password for the whole zip
51
- for pwd in passwords:
52
- try:
53
- for member in tqdm(members, desc=f"Extracting {os.path.basename(zip_file)}", leave=False):
54
- if member.is_dir():
55
- continue
56
- safe_member_path = os.path.normpath(member.filename)
57
- if os.path.isabs(safe_member_path) or safe_member_path.startswith(".."):
58
- continue
59
- out_path = os.path.join(output_dir, safe_member_path)
60
- out_dir = os.path.dirname(out_path)
61
- if not os.path.exists(out_dir):
62
- os.makedirs(out_dir)
63
- with open(out_path, 'wb') as f:
64
- f.write(zf.read(member, pwd.encode('utf-8')))
65
- extracted = True
66
- break # Stop trying passwords after success
67
- except RuntimeError:
68
- # Wrong password, try next
69
- continue
70
- except zipfile.BadZipFile:
71
- continue
72
- if not extracted:
73
- print(f"❌ Could not extract '{zip_file}': no valid password found.")
74
- else:
75
- print(f"✅ Extracted {len(members)} items to '{output_dir}'.")
76
-
77
- def extract_7z(archive_file, output_dir, passwords=None):
78
- if not os.path.exists(output_dir):
79
- os.makedirs(output_dir)
80
-
81
- extracted = False
82
- if not passwords:
83
- try:
84
- with py7zr.SevenZipFile(archive_file, mode='r') as archive:
85
- archive.extractall(path=output_dir)
86
- extracted = True
87
- except (py7zr.exceptions.PasswordRequired, py7zr.exceptions.Bad7zFile, lzma.LZMAError):
88
- pass
89
- except Exception:
90
- pass
91
- else:
92
- for pwd in passwords:
93
- try:
94
- with py7zr.SevenZipFile(archive_file, mode='r', password=pwd) as archive:
95
- archive.extractall(path=output_dir)
96
- extracted = True
97
- break
98
- except (py7zr.exceptions.PasswordRequired, py7zr.exceptions.Bad7zFile, lzma.LZMAError):
99
- continue
100
- except Exception:
101
- continue
102
- if not extracted:
103
- print(f"❌ Could not extract '{archive_file}': no valid password found or archive is corrupt.")
104
- else:
105
- print(f"✅ Extracted '{archive_file}' to '{output_dir}'.")
106
-
107
- def main():
108
- parser = argparse.ArgumentParser(
109
- description="Recursively extract all files from .zip and .7z archives under a given path."
110
- )
111
- parser.add_argument(
112
- "path",
113
- help="Root directory or file to search for .zip/.7z files"
114
- )
115
- parser.add_argument(
116
- "--passwords",
117
- help="Path to a file containing passwords (one per line) to try for encrypted archives"
118
- )
119
- args = parser.parse_args()
120
- root_path = args.path
121
- passwords = load_passwords(args.passwords) if args.passwords else None
122
- for archive_path in find_archive_files(root_path):
123
- archive_dir = os.path.splitext(archive_path)[0]
124
- ext = os.path.splitext(archive_path)[1].lower()
125
- if ext == ".zip":
126
- extract_zip(archive_path, archive_dir, passwords)
127
- elif ext == ".7z":
128
- extract_7z(archive_path, archive_dir, passwords)
129
-
130
- if __name__ == "__main__":
131
- main()