quick-md5check 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md5check/__init__.py ADDED
@@ -0,0 +1,21 @@
1
+ """md5check: Multi-threaded MD5 checksum generator and verifier."""
2
+
3
+ __version__ = "1.0.0"
4
+
5
+ from md5check.core import (
6
+ compute_md5,
7
+ generate_hashes,
8
+ generate_directory,
9
+ parse_md5_file,
10
+ verify_directory,
11
+ verify_hashes,
12
+ )
13
+
14
+ __all__ = [
15
+ "compute_md5",
16
+ "generate_hashes",
17
+ "generate_directory",
18
+ "parse_md5_file",
19
+ "verify_directory",
20
+ "verify_hashes",
21
+ ]
md5check/__main__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Allow running as `python -m md5check`."""
2
+ from md5check.cli import main
3
+
4
+ if __name__ == "__main__":
5
+ main()
md5check/cli.py ADDED
@@ -0,0 +1,152 @@
1
+ """Command-line interface for md5check."""
2
+
3
+ import argparse
4
+ import os
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import List, Optional
8
+
9
+ from md5check import __version__
10
+ from md5check.core import generate_directory, verify_directory
11
+
12
+
13
+ def _positive_int(value: str) -> int:
14
+ """Argument type validator for positive integers."""
15
+ try:
16
+ n = int(value)
17
+ except ValueError:
18
+ raise argparse.ArgumentTypeError(f"invalid integer: {value!r}")
19
+ if n < 1:
20
+ raise argparse.ArgumentTypeError(f"must be >= 1, got {n}")
21
+ return n
22
+
23
+
24
+ def build_parser() -> argparse.ArgumentParser:
25
+ """Build the argument parser."""
26
+ parser = argparse.ArgumentParser(
27
+ prog="md5check",
28
+ description="Multi-threaded MD5 checksum generator and verifier.",
29
+ formatter_class=argparse.RawDescriptionHelpFormatter,
30
+ epilog=(
31
+ "Examples:\n"
32
+ " md5check generate ./myfiles -j 4 -o checksums.md5\n"
33
+ " md5check check checksums.md5 -j 4\n"
34
+ ),
35
+ )
36
+ parser.add_argument(
37
+ "--version",
38
+ action="version",
39
+ version=f"%(prog)s {__version__}",
40
+ )
41
+
42
+ sub = parser.add_subparsers(dest="command", required=True)
43
+
44
+ # --- generate ---
45
+ gen = sub.add_parser(
46
+ "generate",
47
+ aliases=["gen"],
48
+ help="Generate MD5 checksums for all files in a directory",
49
+ description=(
50
+ "Scan a directory and compute MD5 checksums for every file "
51
+ "using multiple threads."
52
+ ),
53
+ )
54
+ gen.add_argument(
55
+ "directory",
56
+ type=Path,
57
+ help="Directory to scan",
58
+ )
59
+ gen.add_argument(
60
+ "-o", "--output",
61
+ type=Path,
62
+ help="Output file (default: stdout)",
63
+ )
64
+ gen.add_argument(
65
+ "-j", "--jobs",
66
+ type=_positive_int,
67
+ default=os.cpu_count() or 1,
68
+ help=(
69
+ f"Number of worker threads (default: {os.cpu_count() or 1})"
70
+ ),
71
+ )
72
+ gen.add_argument(
73
+ "--no-recursive",
74
+ action="store_true",
75
+ help="Do not scan subdirectories",
76
+ )
77
+
78
+ # --- check ---
79
+ chk = sub.add_parser(
80
+ "check",
81
+ help="Verify MD5 checksums from a file",
82
+ description=(
83
+ "Read an md5sum-format file and verify every file's "
84
+ "checksum using multiple threads."
85
+ ),
86
+ )
87
+ chk.add_argument(
88
+ "md5file",
89
+ type=Path,
90
+ help="Path to the md5sum-format file",
91
+ )
92
+ chk.add_argument(
93
+ "-j", "--jobs",
94
+ type=_positive_int,
95
+ default=os.cpu_count() or 1,
96
+ help=(
97
+ f"Number of worker threads (default: {os.cpu_count() or 1})"
98
+ ),
99
+ )
100
+ chk.add_argument(
101
+ "-C", "--directory",
102
+ type=Path,
103
+ default=None,
104
+ help=(
105
+ "Base directory for relative paths in the md5 file. "
106
+ "Defaults to the md5 file's parent directory."
107
+ ),
108
+ )
109
+
110
+ return parser
111
+
112
+
113
+ def main(argv: Optional[List[str]] = None) -> int:
114
+ """Entry point for the ``md5check`` console script."""
115
+ parser = build_parser()
116
+ args = parser.parse_args(argv)
117
+
118
+ if args.command in ("generate", "gen"):
119
+ directory = args.directory.resolve()
120
+ if not directory.is_dir():
121
+ print(f"Error: {directory} is not a directory or does not exist.", file=sys.stderr)
122
+ return 1
123
+ generate_directory(
124
+ directory=directory,
125
+ num_threads=args.jobs,
126
+ recursive=not args.no_recursive,
127
+ output=args.output,
128
+ )
129
+ return 0
130
+
131
+ elif args.command == "check":
132
+ md5file = args.md5file.resolve()
133
+ if not md5file.is_file():
134
+ print(f"Error: {md5file} is not a file or does not exist.", file=sys.stderr)
135
+ return 1
136
+ base_dir = (
137
+ args.directory.resolve()
138
+ if args.directory is not None
139
+ else md5file.parent.resolve()
140
+ )
141
+ verify_directory(
142
+ md5_file=md5file,
143
+ base_dir=base_dir,
144
+ num_threads=args.jobs,
145
+ )
146
+ return 0
147
+
148
+ return 0
149
+
150
+
151
+ if __name__ == "__main__":
152
+ sys.exit(main())
md5check/core.py ADDED
@@ -0,0 +1,253 @@
1
+ """Core MD5 computation logic with multi-threading support."""
2
+
3
+ import hashlib
4
+ import os
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import Iterator, List, Optional, Tuple
9
+
10
+ from tqdm import tqdm
11
+
12
+ CHUNK_SIZE = 64 * 1024 # 64 KB
13
+
14
+
15
+ def compute_md5(file_path: Path) -> str:
16
+ """Compute MD5 hex digest of a single file.
17
+
18
+ Reads the file in 64 KB chunks to handle large files efficiently.
19
+ """
20
+ h = hashlib.md5()
21
+ with open(file_path, "rb") as f:
22
+ while True:
23
+ chunk = f.read(CHUNK_SIZE)
24
+ if not chunk:
25
+ break
26
+ h.update(chunk)
27
+ return h.hexdigest()
28
+
29
+
30
+ def _collect_files(directory: Path, recursive: bool = True) -> List[Path]:
31
+ """Recursively or non-recursively collect all regular files in a directory."""
32
+ if not directory.is_dir():
33
+ raise NotADirectoryError(f"{directory} is not a directory")
34
+
35
+ if recursive:
36
+ # sorted for deterministic output ordering
37
+ files = sorted(
38
+ p for p in directory.rglob("*") if p.is_file()
39
+ )
40
+ else:
41
+ files = sorted(
42
+ p for p in directory.iterdir() if p.is_file()
43
+ )
44
+ return files
45
+
46
+
47
+ @dataclass
48
+ class Md5Entry:
49
+ """Represents a single line in an md5sum-format file."""
50
+ expected_hash: str
51
+ filepath: str # relative path as stored in the md5 file
52
+ full_path: Optional[Path] = None # resolved after base dir is known
53
+
54
+ def __post_init__(self):
55
+ self.expected_hash = self.expected_hash.strip().lower()
56
+
57
+
58
+ @dataclass
59
+ class VerifyResult:
60
+ """Result of verifying a single file's MD5."""
61
+ filepath: str
62
+ status: str # "OK" | "FAILED" | "MISSING"
63
+ expected: str
64
+ actual: str = ""
65
+
66
+
67
+ def parse_md5_file(md5_path: Path, base_dir: Path) -> List[Md5Entry]:
68
+ """Parse a standard md5sum file.
69
+
70
+ Format per line: <32-char-hex> <filename>
71
+ Lines starting with '#' are comments; blank lines are skipped.
72
+ """
73
+ entries: List[Md5Entry] = []
74
+ with open(md5_path, "r", encoding="utf-8", errors="replace") as f:
75
+ for line in f:
76
+ line = line.strip()
77
+ if not line or line.startswith("#"):
78
+ continue
79
+ # md5sum format: <hash> <filename>
80
+ parts = line.split(None, 1)
81
+ if len(parts) != 2:
82
+ continue
83
+ hash_str, rel_path = parts
84
+ if len(hash_str) != 32:
85
+ continue
86
+ full = (base_dir / rel_path).resolve()
87
+ entries.append(Md5Entry(
88
+ expected_hash=hash_str,
89
+ filepath=rel_path,
90
+ full_path=full,
91
+ ))
92
+ return entries
93
+
94
+
95
+ def generate_hashes(
96
+ paths: List[Path],
97
+ num_threads: int,
98
+ *,
99
+ desc: str = "Generating MD5",
100
+ unit: str = "file",
101
+ ) -> Iterator[Tuple[Path, str]]:
102
+ """Compute MD5 for each file path using a thread pool.
103
+
104
+ Yields ``(path, hex_digest)`` tuples as they complete.
105
+ """
106
+ with ThreadPoolExecutor(max_workers=num_threads) as executor:
107
+ fut_to_path = {
108
+ executor.submit(compute_md5, p): p
109
+ for p in paths
110
+ }
111
+ with tqdm(total=len(fut_to_path), desc=desc, unit=unit) as pbar:
112
+ for future in as_completed(fut_to_path):
113
+ path = fut_to_path[future]
114
+ try:
115
+ digest = future.result()
116
+ except Exception as exc:
117
+ digest = f"ERROR: {exc}"
118
+ yield path, digest
119
+ pbar.update(1)
120
+
121
+
122
+ def generate_directory(
123
+ directory: Path,
124
+ num_threads: int,
125
+ *,
126
+ recursive: bool = True,
127
+ output: Optional[Path] = None,
128
+ ) -> None:
129
+ """Generate MD5 hashes for all files in a directory.
130
+
131
+ Writes to *output* if given, otherwise prints to stdout.
132
+ The format is standard md5sum: ``<hash> <relpath>``
133
+
134
+ Results are sorted alphabetically by path before output so the
135
+ result is deterministic regardless of thread scheduling.
136
+ """
137
+ files = _collect_files(directory, recursive=recursive)
138
+ if not files:
139
+ print("No files found.")
140
+ return
141
+
142
+ results: List[Tuple[Path, str]] = []
143
+ for path, digest in generate_hashes(files, num_threads):
144
+ results.append((path, digest))
145
+
146
+ # sort by relative path for deterministic output
147
+ results.sort(key=lambda x: x[0])
148
+
149
+ lines: List[str] = []
150
+ for path, digest in results:
151
+ rel = path.relative_to(directory).as_posix()
152
+ lines.append(f"{digest} {rel}")
153
+
154
+ output_lines = "\n".join(lines)
155
+ if output:
156
+ output.write_text(output_lines + "\n", encoding="utf-8")
157
+ print(f"MD5 checksums written to {output}")
158
+ else:
159
+ print(output_lines)
160
+
161
+
162
+ def verify_hashes(
163
+ entries: List[Md5Entry],
164
+ num_threads: int,
165
+ *,
166
+ desc: str = "Verifying MD5",
167
+ unit: str = "file",
168
+ ) -> Iterator[VerifyResult]:
169
+ """Verify each Md5Entry against its file on disk.
170
+
171
+ Yields ``VerifyResult`` objects as each file completes.
172
+ """
173
+ def _verify_one(entry: Md5Entry) -> VerifyResult:
174
+ if entry.full_path is None or not entry.full_path.is_file():
175
+ return VerifyResult(
176
+ filepath=entry.filepath,
177
+ status="MISSING",
178
+ expected=entry.expected_hash,
179
+ )
180
+ try:
181
+ actual = compute_md5(entry.full_path)
182
+ except Exception as exc:
183
+ return VerifyResult(
184
+ filepath=entry.filepath,
185
+ status="FAILED",
186
+ expected=entry.expected_hash,
187
+ actual=str(exc),
188
+ )
189
+ status = "OK" if actual == entry.expected_hash else "FAILED"
190
+ return VerifyResult(
191
+ filepath=entry.filepath,
192
+ status=status,
193
+ expected=entry.expected_hash,
194
+ actual=actual,
195
+ )
196
+
197
+ with ThreadPoolExecutor(max_workers=num_threads) as executor:
198
+ futures = {executor.submit(_verify_one, e): e for e in entries}
199
+ with tqdm(total=len(futures), desc=desc, unit=unit) as pbar:
200
+ for future in as_completed(futures):
201
+ result = future.result()
202
+ yield result
203
+ pbar.update(1)
204
+
205
+
206
+ def verify_directory(
207
+ md5_file: Path,
208
+ base_dir: Path,
209
+ num_threads: int,
210
+ ) -> None:
211
+ """Verify files listed in an md5sum file against their checksums.
212
+
213
+ Base directory for relative paths is *base_dir* (usually the
214
+ directory containing the md5 file). Prints a summary: OK / FAILED
215
+ / MISSING counts, and details for non-OK results.
216
+ """
217
+ entries = parse_md5_file(md5_file, base_dir)
218
+ if not entries:
219
+ print("No valid MD5 entries found in", md5_file)
220
+ return
221
+
222
+ ok_count = 0
223
+ failed: List[VerifyResult] = []
224
+ missing: List[VerifyResult] = []
225
+
226
+ for result in verify_hashes(entries, num_threads):
227
+ if result.status == "OK":
228
+ ok_count += 1
229
+ elif result.status == "FAILED":
230
+ failed.append(result)
231
+ else:
232
+ missing.append(result)
233
+
234
+ # Print summary
235
+ total = len(entries)
236
+ print(f"\n{'='*50}")
237
+ print(f"Verification complete: {total} file(s)")
238
+ print(f" OK: {ok_count}")
239
+ print(f" FAILED: {len(failed)}")
240
+ print(f" MISSING: {len(missing)}")
241
+ print(f"{'='*50}")
242
+
243
+ if failed:
244
+ print("\n--- FAILED ---")
245
+ for r in failed:
246
+ print(f" {r.filepath}")
247
+ print(f" expected: {r.expected}")
248
+ print(f" actual: {r.actual}")
249
+
250
+ if missing:
251
+ print("\n--- MISSING ---")
252
+ for r in missing:
253
+ print(f" {r.filepath}")
@@ -0,0 +1,134 @@
1
+ Metadata-Version: 2.4
2
+ Name: quick_md5check
3
+ Version: 1.0.0
4
+ Summary: Multi-threaded MD5 checksum generator and verifier
5
+ Author: lg10is1
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/lg10is1/quick_md5check
8
+ Project-URL: Repository, https://github.com/lg10is1/quick_md5check
9
+ Project-URL: BugTracker, https://github.com/lg10is1/quick_md5check/issues
10
+ Keywords: md5,checksum,multi-threaded,parallel
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Utilities
23
+ Classifier: Topic :: System :: Archiving :: Backup
24
+ Classifier: Topic :: Security :: Cryptography
25
+ Requires-Python: >=3.8
26
+ Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Requires-Dist: tqdm>=4.60.0
29
+ Dynamic: license-file
30
+ Dynamic: requires-python
31
+
32
+ # Quick_md5check
33
+
34
+ Multi-threaded MD5 checksum generator and verifier. Computes and verifies MD5 hashes for many files in parallel — significantly faster than single-threaded `md5sum` on multi-core systems.
35
+
36
+ ## Features
37
+
38
+ - **Multi-threaded** — configurable thread count via `-j`/`--jobs`
39
+ - **Generate mode** — scan a directory and compute MD5 for every file
40
+ - **Check mode** — verify files against an `md5sum`-format file
41
+ - **Cross-platform** — Windows, Linux, macOS
42
+ - **Standard output format** — compatible with GNU `md5sum` and `md5sum -c`
43
+ - **Progress bar** — real-time feedback via `tqdm`
44
+ - **Recursive scan** — optionally disable with `--no-recursive`
45
+
46
+ ## Installation
47
+
48
+ ### pip
49
+ ```bash
50
+ pip install quick_md5check
51
+ ```
52
+
53
+ ### conda
54
+ ```bash
55
+ conda install -c your-channel quick_md5check
56
+ ```
57
+
58
+ ### From source
59
+ ```bash
60
+ git clone https://github.com/lg10is1/quick_md5check.git
61
+ cd md5check
62
+ pip install -e .
63
+ ```
64
+
65
+ ## Usage
66
+
67
+ ### Generate checksums
68
+ ```bash
69
+ # Use all available CPU cores
70
+ md5check generate ./myfiles -o checksums.md5
71
+
72
+ # Use 8 worker threads
73
+ md5check generate ./myfiles -j 8 -o checksums.md5
74
+
75
+ # Non-recursive (top-level files only)
76
+ md5check generate ./myfiles --no-recursive -o checksums.md5
77
+
78
+ # Print to stdout
79
+ md5check generate ./myfiles
80
+ ```
81
+
82
+ ### Verify checksums
83
+ ```bash
84
+ # Verify using checksum file, auto-detect base dir
85
+ md5check check checksums.md5
86
+
87
+ # Use 8 worker threads
88
+ md5check check checksums.md5 -j 8
89
+
90
+ # Specify base directory for relative paths
91
+ md5check check checksums.md5 -C ./myfiles
92
+ ```
93
+
94
+ ### Compatible with GNU md5sum
95
+ ```bash
96
+ # Generate (GNU md5sum format)
97
+ md5sum myfiles/* > checksums.md5
98
+
99
+ # Verify with md5check
100
+ md5check check checksums.md5
101
+
102
+ # Generate with md5check, verify with GNU md5sum
103
+ md5check generate ./myfiles -o checksums.md5
104
+ md5sum -c checksums.md5
105
+ ```
106
+
107
+ ## Output format
108
+
109
+ Standard `md5sum` format — one entry per line:
110
+
111
+ ```
112
+ d41d8cd98f00b204e9800998ecf8427e path/to/empty.txt
113
+ 900150983cd24fb0d6963f7d28e17f72 path/to/hello.txt
114
+ ```
115
+
116
+ ## Development
117
+
118
+ ### Run from source
119
+ ```bash
120
+ python -m md5check generate ./tests/data -j 4
121
+ ```
122
+
123
+ ### Build
124
+ ```bash
125
+ # Install build dependencies
126
+ pip install build
127
+
128
+ # Build wheel and source distribution
129
+ python -m build
130
+ ```
131
+
132
+ ## License
133
+
134
+ MIT
@@ -0,0 +1,10 @@
1
+ md5check/__init__.py,sha256=-gGBCmwAPe7cGVLGxRPawxAXI-yl0sa28bus7qozCL4,395
2
+ md5check/__main__.py,sha256=lGYpT51iW7h_jaP6D3NYxGMlds5Rk9p_Blend1eE90U,114
3
+ md5check/cli.py,sha256=5OwlbqJ_cO4aceFppsGq0OpBVc9u4M21Kx5uApRlNR4,4146
4
+ md5check/core.py,sha256=0U7AVA8pOAITSE1NiDaebywgzcNAoIKilkYrdAHWfUo,7670
5
+ quick_md5check-1.0.0.dist-info/licenses/LICENSE,sha256=A864Jj0HEiav2-1Ws-KacJr8rZMW5Nnk3k1HK3JJnoI,1078
6
+ quick_md5check-1.0.0.dist-info/METADATA,sha256=co7ZNocN4_5adEoR3eHCSP9N1IN0PgHV9fodAVdOahQ,3442
7
+ quick_md5check-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
8
+ quick_md5check-1.0.0.dist-info/entry_points.txt,sha256=9n-DRaHH_HkX_IfiulHB_Q2ZXO8oU6SNqEZaoXqseU8,47
9
+ quick_md5check-1.0.0.dist-info/top_level.txt,sha256=JdxmmsLyWEyK7etIfTE9ErRwOciQNbedqqTC_Cbpl6c,9
10
+ quick_md5check-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ md5check = md5check.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 md5check contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ md5check