evolver-tools 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evolver_tools/__init__.py +2 -0
- evolver_tools/__main__.py +3 -0
- evolver_tools/cli.py +89 -0
- evolver_tools/vendor/b64/__init__.py +2 -0
- evolver_tools/vendor/b64/b64.py +176 -0
- evolver_tools/vendor/cal_tool/__init__.py +1 -0
- evolver_tools/vendor/cal_tool/cli.py +234 -0
- evolver_tools/vendor/chart_cli/__init__.py +444 -0
- evolver_tools/vendor/chart_cli/__main__.py +3 -0
- evolver_tools/vendor/colors/__init__.py +5 -0
- evolver_tools/vendor/colors/__main__.py +97 -0
- evolver_tools/vendor/csv_stats/__init__.py +5 -0
- evolver_tools/vendor/csv_stats/__main__.py +4 -0
- evolver_tools/vendor/csv_stats/analyzer.py +258 -0
- evolver_tools/vendor/csv_stats/cli.py +45 -0
- evolver_tools/vendor/dirsize/__init__.py +183 -0
- evolver_tools/vendor/envcheck/__init__.py +426 -0
- evolver_tools/vendor/ff/__init__.py +427 -0
- evolver_tools/vendor/ff/__main__.py +3 -0
- evolver_tools/vendor/find_dups/__init__.py +7 -0
- evolver_tools/vendor/find_dups/cli.py +392 -0
- evolver_tools/vendor/hashsum/__init__.py +211 -0
- evolver_tools/vendor/hashsum/__main__.py +5 -0
- evolver_tools/vendor/http_live/__init__.py +265 -0
- evolver_tools/vendor/http_live/__main__.py +2 -0
- evolver_tools/vendor/ipinfo/__init__.py +3 -0
- evolver_tools/vendor/ipinfo/__main__.py +30 -0
- evolver_tools/vendor/jq_lite/__init__.py +257 -0
- evolver_tools/vendor/jq_lite/__main__.py +5 -0
- evolver_tools/vendor/json2csv/__init__.py +3 -0
- evolver_tools/vendor/json2csv/__main__.py +82 -0
- evolver_tools/vendor/jsonql/__init__.py +326 -0
- evolver_tools/vendor/jsonql/__main__.py +5 -0
- evolver_tools/vendor/license_cli/__init__.py +1 -0
- evolver_tools/vendor/license_cli/__main__.py +4 -0
- evolver_tools/vendor/license_cli/cli.py +289 -0
- evolver_tools/vendor/markdown_check/__init__.py +211 -0
- evolver_tools/vendor/nb/__init__.py +319 -0
- evolver_tools/vendor/nb/__main__.py +3 -0
- evolver_tools/vendor/passgen/__init__.py +224 -0
- evolver_tools/vendor/portcheck/__init__.py +2 -0
- evolver_tools/vendor/portcheck/__main__.py +66 -0
- evolver_tools/vendor/project_doctor/__init__.py +412 -0
- evolver_tools/vendor/project_doctor/__main__.py +3 -0
- evolver_tools/vendor/ren/__init__.py +283 -0
- evolver_tools/vendor/ren/__main__.py +3 -0
- evolver_tools/vendor/siege_lite/__init__.py +250 -0
- evolver_tools/vendor/siege_lite/__main__.py +3 -0
- evolver_tools/vendor/smellfinder/__init__.py +376 -0
- evolver_tools/vendor/smellfinder/__main__.py +3 -0
- evolver_tools/vendor/sqlite_cli/__init__.py +326 -0
- evolver_tools/vendor/sqlite_cli/__main__.py +5 -0
- evolver_tools/vendor/sysmon/__init__.py +299 -0
- evolver_tools/vendor/sysmon/__main__.py +3 -0
- evolver_tools/vendor/timer/__init__.py +127 -0
- evolver_tools/vendor/treedir/__init__.py +2 -0
- evolver_tools/vendor/treedir/__main__.py +128 -0
- evolver_tools/vendor/urlparse_tool/__init__.py +3 -0
- evolver_tools/vendor/urlparse_tool/cli.py +212 -0
- evolver_tools/vendor/web_summary/__init__.py +341 -0
- evolver_tools/vendor/web_summary/__main__.py +3 -0
- evolver_tools/vendor/wordcount/__init__.py +2 -0
- evolver_tools/vendor/wordcount/__main__.py +101 -0
- evolver_tools-1.4.0.dist-info/METADATA +107 -0
- evolver_tools-1.4.0.dist-info/RECORD +69 -0
- evolver_tools-1.4.0.dist-info/WHEEL +5 -0
- evolver_tools-1.4.0.dist-info/entry_points.txt +34 -0
- evolver_tools-1.4.0.dist-info/licenses/LICENSE +21 -0
- evolver_tools-1.4.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""find-dups — Find duplicate files by SHA256 content hash."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
import hashlib
|
|
8
|
+
import os
|
|
9
|
+
import fnmatch
|
|
10
|
+
import sys
|
|
11
|
+
import time
|
|
12
|
+
|
|
13
|
+
# ── Progress helper ──────────────────────────────────────────────────────────
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _human_size(n: int) -> str:
|
|
17
|
+
"""Format a byte count in human-readable form."""
|
|
18
|
+
for unit in ("B", "KiB", "MiB", "GiB", "TiB"):
|
|
19
|
+
if abs(n) < 1024:
|
|
20
|
+
return f"{n}{unit}"
|
|
21
|
+
n //= 1024
|
|
22
|
+
return f"{n}PiB"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Progress:
|
|
26
|
+
"""Simple terminal progress reporter (zero-dependency)."""
|
|
27
|
+
|
|
28
|
+
def __init__(self, interval: float = 0.25) -> None:
|
|
29
|
+
self._last = 0.0
|
|
30
|
+
self._interval = interval
|
|
31
|
+
self._scanned = 0
|
|
32
|
+
self._hashed = 0
|
|
33
|
+
self._errors = 0
|
|
34
|
+
self._start = time.time()
|
|
35
|
+
|
|
36
|
+
def tick_scanned(self) -> None:
|
|
37
|
+
self._scanned += 1
|
|
38
|
+
self._maybe_print()
|
|
39
|
+
|
|
40
|
+
def tick_hashed(self) -> None:
|
|
41
|
+
self._hashed += 1
|
|
42
|
+
self._maybe_print()
|
|
43
|
+
|
|
44
|
+
def tick_error(self) -> None:
|
|
45
|
+
self._errors += 1
|
|
46
|
+
self._maybe_print()
|
|
47
|
+
|
|
48
|
+
def _maybe_print(self) -> None:
|
|
49
|
+
now = time.time()
|
|
50
|
+
if now - self._last < self._interval:
|
|
51
|
+
return
|
|
52
|
+
self._last = now
|
|
53
|
+
elapsed = now - self._start
|
|
54
|
+
rate = self._scanned / elapsed if elapsed > 0 else 0
|
|
55
|
+
print(
|
|
56
|
+
f" scanned {self._scanned:,} | hashed {self._hashed:,} "
|
|
57
|
+
f"| errors {self._errors:,} | {rate:.0f} files/s \r",
|
|
58
|
+
end="",
|
|
59
|
+
file=sys.stderr,
|
|
60
|
+
flush=True,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def finish(self) -> None:
|
|
64
|
+
elapsed = time.time() - self._start
|
|
65
|
+
print(
|
|
66
|
+
f" scanned {self._scanned:,} | hashed {self._hashed:,} "
|
|
67
|
+
f"| errors {self._errors:,} | {elapsed:.1f}s total ",
|
|
68
|
+
file=sys.stderr,
|
|
69
|
+
flush=True,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ── Core logic ───────────────────────────────────────────────────────────────
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _sha256_file(path: str, buffer_size: int = 2**20, *, progress: Progress | None = None) -> str | None:
|
|
77
|
+
"""Return hex SHA256 digest of *path*, or ``None`` on error."""
|
|
78
|
+
h = hashlib.sha256()
|
|
79
|
+
try:
|
|
80
|
+
with open(path, "rb") as f:
|
|
81
|
+
while True:
|
|
82
|
+
chunk = f.read(buffer_size)
|
|
83
|
+
if not chunk:
|
|
84
|
+
break
|
|
85
|
+
h.update(chunk)
|
|
86
|
+
except (OSError, PermissionError) as exc:
|
|
87
|
+
if progress:
|
|
88
|
+
progress.tick_error()
|
|
89
|
+
print(f" [error] {path}: {exc}", file=sys.stderr)
|
|
90
|
+
return None
|
|
91
|
+
if progress:
|
|
92
|
+
progress.tick_hashed()
|
|
93
|
+
return h.hexdigest()
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _walk_files(
|
|
97
|
+
root: str,
|
|
98
|
+
*,
|
|
99
|
+
min_size: int = 0,
|
|
100
|
+
max_size: int | None = None,
|
|
101
|
+
excludes: list[str] | None = None,
|
|
102
|
+
progress: Progress | None = None,
|
|
103
|
+
) -> list[str]:
|
|
104
|
+
"""Recursively collect file paths under *root* matching filters."""
|
|
105
|
+
excludes = excludes or []
|
|
106
|
+
files: list[str] = []
|
|
107
|
+
|
|
108
|
+
for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
|
|
109
|
+
# Skip excluded directories eagerly
|
|
110
|
+
dirnames[:] = [
|
|
111
|
+
d
|
|
112
|
+
for d in dirnames
|
|
113
|
+
if not any(fnmatch.fnmatch(os.path.join(dirpath, d), pat) for pat in excludes)
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
for fname in filenames:
|
|
117
|
+
fpath = os.path.join(dirpath, fname)
|
|
118
|
+
|
|
119
|
+
# Check exclude patterns against full path
|
|
120
|
+
if any(fnmatch.fnmatch(fpath, pat) for pat in excludes):
|
|
121
|
+
if progress:
|
|
122
|
+
progress.tick_scanned()
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
# Stat the file
|
|
126
|
+
try:
|
|
127
|
+
st = os.lstat(fpath)
|
|
128
|
+
except OSError:
|
|
129
|
+
if progress:
|
|
130
|
+
progress.tick_scanned()
|
|
131
|
+
progress.tick_error()
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
# Skip non-regular files (symlinks, devices, sockets, etc.)
|
|
135
|
+
if not os.path.isfile(fpath):
|
|
136
|
+
if progress:
|
|
137
|
+
progress.tick_scanned()
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
size = st.st_size
|
|
141
|
+
|
|
142
|
+
# Size filters
|
|
143
|
+
if size < min_size:
|
|
144
|
+
if progress:
|
|
145
|
+
progress.tick_scanned()
|
|
146
|
+
continue
|
|
147
|
+
if max_size is not None and size > max_size:
|
|
148
|
+
if progress:
|
|
149
|
+
progress.tick_scanned()
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
if progress:
|
|
153
|
+
progress.tick_scanned()
|
|
154
|
+
|
|
155
|
+
files.append(fpath)
|
|
156
|
+
|
|
157
|
+
return files
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def find_duplicates(
|
|
161
|
+
paths: list[str],
|
|
162
|
+
*,
|
|
163
|
+
min_size: int = 0,
|
|
164
|
+
max_size: int | None = None,
|
|
165
|
+
excludes: list[str] | None = None,
|
|
166
|
+
progress: bool = True,
|
|
167
|
+
) -> dict[str, list[str]]:
|
|
168
|
+
"""
|
|
169
|
+
Return a dict mapping SHA256 hex digest -> list of duplicate file paths.
|
|
170
|
+
|
|
171
|
+
Only groups with **two or more** files are included.
|
|
172
|
+
"""
|
|
173
|
+
prog = Progress() if progress else None
|
|
174
|
+
|
|
175
|
+
# Phase 1: collect candidate files
|
|
176
|
+
all_files: list[str] = []
|
|
177
|
+
for p in paths:
|
|
178
|
+
if os.path.isfile(p):
|
|
179
|
+
# Single file — still apply size/exclude rules
|
|
180
|
+
skip = False
|
|
181
|
+
if excludes:
|
|
182
|
+
if any(fnmatch.fnmatch(p, pat) for pat in excludes):
|
|
183
|
+
skip = True
|
|
184
|
+
if not skip:
|
|
185
|
+
try:
|
|
186
|
+
st = os.lstat(p)
|
|
187
|
+
if os.path.isfile(p):
|
|
188
|
+
size = st.st_size
|
|
189
|
+
if size >= min_size and (max_size is None or size <= max_size):
|
|
190
|
+
all_files.append(p)
|
|
191
|
+
except OSError:
|
|
192
|
+
pass
|
|
193
|
+
else:
|
|
194
|
+
all_files.extend(
|
|
195
|
+
_walk_files(
|
|
196
|
+
p,
|
|
197
|
+
min_size=min_size,
|
|
198
|
+
max_size=max_size,
|
|
199
|
+
excludes=excludes,
|
|
200
|
+
progress=prog,
|
|
201
|
+
)
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
prog2 = Progress() if progress else None
|
|
205
|
+
if prog:
|
|
206
|
+
print(f" Files to hash: {len(all_files):,}", file=sys.stderr)
|
|
207
|
+
|
|
208
|
+
# Phase 2: hash all files
|
|
209
|
+
hash_map: dict[str, list[str]] = {}
|
|
210
|
+
for fpath in all_files:
|
|
211
|
+
digest = _sha256_file(fpath, progress=prog2)
|
|
212
|
+
if digest is None:
|
|
213
|
+
continue
|
|
214
|
+
hash_map.setdefault(digest, []).append(fpath)
|
|
215
|
+
|
|
216
|
+
if prog2:
|
|
217
|
+
prog2.finish()
|
|
218
|
+
|
|
219
|
+
# Phase 3: keep only groups with 2+ files
|
|
220
|
+
return {digest: paths for digest, paths in hash_map.items() if len(paths) >= 2}
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
# ── CLI ──────────────────────────────────────────────────────────────────────
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _size_arg(value: str) -> int:
|
|
227
|
+
"""Parse a size argument like ``10``, ``1K``, ``2M``, ``1G``."""
|
|
228
|
+
value = value.strip().upper()
|
|
229
|
+
multipliers = {"K": 1024, "M": 1024**2, "G": 1024**3, "T": 1024**4}
|
|
230
|
+
if value[-1] in multipliers:
|
|
231
|
+
return int(value[:-1]) * multipliers[value[-1]]
|
|
232
|
+
return int(value)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
236
|
+
parser = argparse.ArgumentParser(
|
|
237
|
+
prog="find-dups",
|
|
238
|
+
description="Find duplicate files by SHA256 content hash.",
|
|
239
|
+
epilog="Examples:\n"
|
|
240
|
+
" find-dups /home/user/Documents\n"
|
|
241
|
+
" find-dups . --min-size 1M\n"
|
|
242
|
+
" find-dups /data --exclude '*.tmp' --exclude '__pycache__/*' --delete\n"
|
|
243
|
+
" find-dups /a /b --max-size 10K",
|
|
244
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
245
|
+
)
|
|
246
|
+
parser.add_argument(
|
|
247
|
+
"paths",
|
|
248
|
+
nargs="+",
|
|
249
|
+
help="One or more file or directory paths to scan",
|
|
250
|
+
)
|
|
251
|
+
parser.add_argument(
|
|
252
|
+
"--min-size",
|
|
253
|
+
type=_size_arg,
|
|
254
|
+
default=0,
|
|
255
|
+
help='Minimum file size (e.g. "1K", "5M", "1G"). Default: 0',
|
|
256
|
+
)
|
|
257
|
+
parser.add_argument(
|
|
258
|
+
"--max-size",
|
|
259
|
+
type=_size_arg,
|
|
260
|
+
default=None,
|
|
261
|
+
help='Maximum file size (e.g. "10M", "500K"). Default: no limit',
|
|
262
|
+
)
|
|
263
|
+
parser.add_argument(
|
|
264
|
+
"--exclude",
|
|
265
|
+
action="append",
|
|
266
|
+
default=[],
|
|
267
|
+
dest="excludes",
|
|
268
|
+
help='Glob patterns to exclude (can be repeated). E.g. --exclude "*.tmp"',
|
|
269
|
+
)
|
|
270
|
+
parser.add_argument(
|
|
271
|
+
"--delete",
|
|
272
|
+
action="store_true",
|
|
273
|
+
help="Delete duplicate files, keeping only the first in each group",
|
|
274
|
+
)
|
|
275
|
+
parser.add_argument(
|
|
276
|
+
"--quiet",
|
|
277
|
+
"-q",
|
|
278
|
+
action="store_true",
|
|
279
|
+
help="Suppress progress indicator",
|
|
280
|
+
)
|
|
281
|
+
parser.add_argument(
|
|
282
|
+
"--no-color",
|
|
283
|
+
action="store_true",
|
|
284
|
+
help="Disable colored output",
|
|
285
|
+
)
|
|
286
|
+
return parser
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _color(text: str, code: str, *, no_color: bool = False) -> str:
|
|
290
|
+
if no_color:
|
|
291
|
+
return text
|
|
292
|
+
codes = {
|
|
293
|
+
"green": "\033[92m",
|
|
294
|
+
"yellow": "\033[93m",
|
|
295
|
+
"red": "\033[91m",
|
|
296
|
+
"bold": "\033[1m",
|
|
297
|
+
"dim": "\033[2m",
|
|
298
|
+
"reset": "\033[0m",
|
|
299
|
+
}
|
|
300
|
+
return f"{codes.get(code, '')}{text}{codes['reset']}"
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def main(argv: list[str] | None = None) -> int:
|
|
304
|
+
parser = build_parser()
|
|
305
|
+
args = parser.parse_args(argv)
|
|
306
|
+
|
|
307
|
+
print(_color("find-dups", "bold") + _color(" — scanning for duplicate files…", "dim"), file=sys.stderr)
|
|
308
|
+
print(file=sys.stderr)
|
|
309
|
+
|
|
310
|
+
duplicates = find_duplicates(
|
|
311
|
+
args.paths,
|
|
312
|
+
min_size=args.min_size,
|
|
313
|
+
max_size=args.max_size,
|
|
314
|
+
excludes=args.excludes or None,
|
|
315
|
+
progress=not args.quiet,
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
print(file=sys.stderr)
|
|
319
|
+
|
|
320
|
+
if not duplicates:
|
|
321
|
+
print(_color(" No duplicate files found.", "green", no_color=args.no_color))
|
|
322
|
+
return 0
|
|
323
|
+
|
|
324
|
+
total_dup_files = sum(len(paths) for paths in duplicates.values())
|
|
325
|
+
total_wasted = 0
|
|
326
|
+
|
|
327
|
+
for i, (digest, paths) in enumerate(sorted(duplicates.items())):
|
|
328
|
+
# Size of the kept file (first)
|
|
329
|
+
kept_path = paths[0]
|
|
330
|
+
try:
|
|
331
|
+
kept_size = os.path.getsize(kept_path)
|
|
332
|
+
except OSError:
|
|
333
|
+
kept_size = 0
|
|
334
|
+
wasted = kept_size * (len(paths) - 1)
|
|
335
|
+
total_wasted += wasted
|
|
336
|
+
|
|
337
|
+
group_label = (
|
|
338
|
+
_color(f" Duplicate group {i+1}", "bold", no_color=args.no_color)
|
|
339
|
+
+ _color(f" [{digest[:12]}…]", "dim", no_color=args.no_color)
|
|
340
|
+
+ _color(f" ({len(paths)} files, {_human_size(wasted)} wasted)", "yellow", no_color=args.no_color)
|
|
341
|
+
)
|
|
342
|
+
print(group_label)
|
|
343
|
+
print(f" {'─' * 60}")
|
|
344
|
+
for j, fpath in enumerate(paths):
|
|
345
|
+
prefix = _color(" ✓ kept", "green", no_color=args.no_color) if j == 0 else _color(" ✗ dup", "red", no_color=args.no_color)
|
|
346
|
+
print(f"{prefix} {fpath}")
|
|
347
|
+
print()
|
|
348
|
+
|
|
349
|
+
summary = (
|
|
350
|
+
_color(f" Found {len(duplicates)} duplicate groups", "bold", no_color=args.no_color)
|
|
351
|
+
+ _color(f" ({total_dup_files} files, {_human_size(total_wasted)} reclaimable)", "yellow", no_color=args.no_color)
|
|
352
|
+
)
|
|
353
|
+
print(summary, file=sys.stderr)
|
|
354
|
+
|
|
355
|
+
# ── Delete mode ──────────────────────────────────────────────────────
|
|
356
|
+
if args.delete:
|
|
357
|
+
deleted_count = 0
|
|
358
|
+
deleted_size = 0
|
|
359
|
+
print(file=sys.stderr)
|
|
360
|
+
print(_color(" — Delete mode enabled —", "bold", no_color=args.no_color), file=sys.stderr)
|
|
361
|
+
|
|
362
|
+
for digest, paths in duplicates.items():
|
|
363
|
+
kept = paths[0]
|
|
364
|
+
for fpath in paths[1:]:
|
|
365
|
+
try:
|
|
366
|
+
sz = os.path.getsize(fpath)
|
|
367
|
+
os.remove(fpath)
|
|
368
|
+
deleted_count += 1
|
|
369
|
+
deleted_size += sz
|
|
370
|
+
print(
|
|
371
|
+
_color(" ✗ deleted", "red", no_color=args.no_color)
|
|
372
|
+
+ f" {fpath}",
|
|
373
|
+
)
|
|
374
|
+
except OSError as exc:
|
|
375
|
+
print(
|
|
376
|
+
_color(" ! error", "red", no_color=args.no_color)
|
|
377
|
+
+ f" {fpath}: {exc}",
|
|
378
|
+
file=sys.stderr,
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
print(file=sys.stderr)
|
|
382
|
+
print(
|
|
383
|
+
_color(f" Removed {deleted_count} files", "green", no_color=args.no_color)
|
|
384
|
+
+ _color(f" ({_human_size(deleted_size)} reclaimed)", "yellow", no_color=args.no_color),
|
|
385
|
+
file=sys.stderr,
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
return 0
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
if __name__ == "__main__":
|
|
392
|
+
sys.exit(main())
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"""hashsum: Zero-dependency checksum tool (MD5, SHA1, SHA256, SHA512, BLAKE2).
|
|
2
|
+
|
|
3
|
+
Pure Python stdlib implementation (hashlib). Works like sha256sum/md5sum/b2sum.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import sys
|
|
7
|
+
import os
|
|
8
|
+
import hashlib
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
__version__ = "1.0.0"
|
|
13
|
+
|
|
14
|
+
# Available algorithms mapped to their hashlib names
|
|
15
|
+
ALGORITHMS = {
|
|
16
|
+
"md5": hashlib.md5,
|
|
17
|
+
"sha1": hashlib.sha1,
|
|
18
|
+
"sha224": hashlib.sha224,
|
|
19
|
+
"sha256": hashlib.sha256,
|
|
20
|
+
"sha384": hashlib.sha384,
|
|
21
|
+
"sha512": hashlib.sha512,
|
|
22
|
+
"blake2b": hashlib.blake2b,
|
|
23
|
+
"blake2s": hashlib.blake2s,
|
|
24
|
+
"sha3_256": hashlib.sha3_256,
|
|
25
|
+
"sha3_512": hashlib.sha3_512,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
CHUNK_SIZE = 65536 # 64KB
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def hash_file(path: str, alg: str) -> tuple:
|
|
33
|
+
"""Hash a single file, return (hexdigest, path)."""
|
|
34
|
+
h = ALGORITHMS[alg]()
|
|
35
|
+
with open(path, "rb") as f:
|
|
36
|
+
while True:
|
|
37
|
+
chunk = f.read(CHUNK_SIZE)
|
|
38
|
+
if not chunk:
|
|
39
|
+
break
|
|
40
|
+
h.update(chunk)
|
|
41
|
+
return h.hexdigest(), path
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def hash_stdin(alg: str) -> str:
|
|
45
|
+
"""Hash stdin data."""
|
|
46
|
+
h = ALGORITHMS[alg]()
|
|
47
|
+
while True:
|
|
48
|
+
chunk = sys.stdin.buffer.read(CHUNK_SIZE)
|
|
49
|
+
if not chunk:
|
|
50
|
+
break
|
|
51
|
+
h.update(chunk)
|
|
52
|
+
return h.hexdigest()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def format_line(hexdigest: str, path: str, binary: bool) -> str:
|
|
56
|
+
"""Format output line like sha256sum."""
|
|
57
|
+
marker = " *" if binary else " "
|
|
58
|
+
return f"{hexdigest}{marker}{path}"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def check_file(ck_path: str, alg: str = "sha256", strict: bool = False) -> tuple:
|
|
62
|
+
"""Verify checksums from a checksum file. Returns (passed, failed, missing)."""
|
|
63
|
+
passed = []
|
|
64
|
+
failed = []
|
|
65
|
+
missing = []
|
|
66
|
+
|
|
67
|
+
with open(ck_path) as f:
|
|
68
|
+
for line in f:
|
|
69
|
+
line = line.strip()
|
|
70
|
+
if not line or line.startswith("#"):
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
# Parse: HEX MODE? PATH
|
|
74
|
+
parts = line.split(None, 2)
|
|
75
|
+
if len(parts) < 2:
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
exp_hex = parts[0]
|
|
79
|
+
rest = parts[-1] # path (skip binary marker)
|
|
80
|
+
|
|
81
|
+
if os.path.exists(rest):
|
|
82
|
+
actual_hex, _ = hash_file(rest, alg)
|
|
83
|
+
if actual_hex == exp_hex:
|
|
84
|
+
passed.append(rest)
|
|
85
|
+
else:
|
|
86
|
+
failed.append(rest)
|
|
87
|
+
else:
|
|
88
|
+
missing.append(rest)
|
|
89
|
+
|
|
90
|
+
return passed, failed, missing
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def main():
|
|
94
|
+
import argparse
|
|
95
|
+
|
|
96
|
+
parser = argparse.ArgumentParser(
|
|
97
|
+
description="hashsum — Checksum calculator and verifier",
|
|
98
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
99
|
+
epilog="""
|
|
100
|
+
Examples:
|
|
101
|
+
hashsum file.iso # Default: SHA256
|
|
102
|
+
hashsum -a md5 file.zip # MD5
|
|
103
|
+
hashsum -a sha512 file.bin # SHA512
|
|
104
|
+
hashsum -a blake2b file.tar.gz # BLAKE2b
|
|
105
|
+
hashsum *.iso > checksums.sha256 # Generate checksum file
|
|
106
|
+
hashsum -c checksums.sha256 # Verify checksums
|
|
107
|
+
cat file.bin | hashsum # Hash from stdin
|
|
108
|
+
hashsum -a md5 -r file.txt # Just the hash (no filename)
|
|
109
|
+
"""
|
|
110
|
+
)
|
|
111
|
+
parser.add_argument("files", nargs="*", help="Files to hash (default: stdin)")
|
|
112
|
+
parser.add_argument("-a", "--algorithm", default="sha256",
|
|
113
|
+
choices=sorted(ALGORITHMS.keys()),
|
|
114
|
+
help="Hash algorithm (default: sha256)")
|
|
115
|
+
parser.add_argument("-c", "--check", action="store_true",
|
|
116
|
+
help="Read checksums from file and verify")
|
|
117
|
+
parser.add_argument("-b", "--binary", action="store_true",
|
|
118
|
+
help="Binary mode marker (*) in output")
|
|
119
|
+
parser.add_argument("-r", "--raw", action="store_true",
|
|
120
|
+
help="Output raw hash only (no filename)")
|
|
121
|
+
parser.add_argument("-l", "--list", action="store_true",
|
|
122
|
+
help="List available algorithms")
|
|
123
|
+
|
|
124
|
+
args = parser.parse_args()
|
|
125
|
+
|
|
126
|
+
# Allow algorithm as first positional arg (e.g. `hashsum sha256 file.txt`)
|
|
127
|
+
if args.files and args.files[0] in ALGORITHMS:
|
|
128
|
+
args.algorithm = args.files[0]
|
|
129
|
+
args.files = args.files[1:]
|
|
130
|
+
|
|
131
|
+
# List algorithms
|
|
132
|
+
if args.list:
|
|
133
|
+
print("Available algorithms:")
|
|
134
|
+
for name in sorted(ALGORITHMS.keys()):
|
|
135
|
+
desc = hashlib.algorithms_available if hasattr(hashlib, 'algorithms_available') else ""
|
|
136
|
+
size = hashlib.new(name).digest_size * 8 if name not in ALGORITHMS else ""
|
|
137
|
+
print(f" {name:<12} {hashlib.new(name).digest_size * 8} bits")
|
|
138
|
+
return
|
|
139
|
+
|
|
140
|
+
# Check mode
|
|
141
|
+
if args.check:
|
|
142
|
+
total_pass = 0
|
|
143
|
+
total_fail = 0
|
|
144
|
+
total_miss = 0
|
|
145
|
+
for ck_file in args.files or [sys.stdin]:
|
|
146
|
+
if ck_file == sys.stdin:
|
|
147
|
+
# Read from stdin
|
|
148
|
+
import io
|
|
149
|
+
for line in sys.stdin:
|
|
150
|
+
pass # Skip for now, proper check file stdin not implemented yet
|
|
151
|
+
continue
|
|
152
|
+
passed, failed, missing = check_file(ck_file, args.algorithm)
|
|
153
|
+
for p in passed:
|
|
154
|
+
print(f"{p}: OK")
|
|
155
|
+
for f in failed:
|
|
156
|
+
print(f"{f}: FAILED")
|
|
157
|
+
for m in missing:
|
|
158
|
+
print(f"{m}: MISSING")
|
|
159
|
+
total_pass += len(passed)
|
|
160
|
+
total_fail += len(failed)
|
|
161
|
+
total_miss += len(missing)
|
|
162
|
+
if not (passed or failed or missing):
|
|
163
|
+
print(f"hashsum: {ck_file}: no valid checksum lines found", file=sys.stderr)
|
|
164
|
+
|
|
165
|
+
if total_fail + total_miss > 0:
|
|
166
|
+
print(f"\nChecksums: {total_pass} passed, {total_fail} failed, {total_miss} missing")
|
|
167
|
+
sys.exit(1)
|
|
168
|
+
else:
|
|
169
|
+
print(f"\nChecksums: {total_pass} passed")
|
|
170
|
+
return
|
|
171
|
+
|
|
172
|
+
# Hash mode
|
|
173
|
+
if not args.files:
|
|
174
|
+
# Hash from stdin
|
|
175
|
+
hexdigest = hash_stdin(args.algorithm)
|
|
176
|
+
if args.raw:
|
|
177
|
+
print(hexdigest)
|
|
178
|
+
else:
|
|
179
|
+
print(format_line(hexdigest, "-", args.binary))
|
|
180
|
+
return
|
|
181
|
+
|
|
182
|
+
# Hash files
|
|
183
|
+
results = []
|
|
184
|
+
errors = []
|
|
185
|
+
for path in args.files:
|
|
186
|
+
if not os.path.exists(path):
|
|
187
|
+
errors.append(f"hashsum: {path}: No such file or directory")
|
|
188
|
+
continue
|
|
189
|
+
try:
|
|
190
|
+
hexdigest, _ = hash_file(path, args.algorithm)
|
|
191
|
+
results.append((hexdigest, path))
|
|
192
|
+
except (IOError, PermissionError) as e:
|
|
193
|
+
errors.append(f"hashsum: {path}: {e}")
|
|
194
|
+
|
|
195
|
+
# Print errors first (like sha256sum does)
|
|
196
|
+
for err in errors:
|
|
197
|
+
print(err, file=sys.stderr)
|
|
198
|
+
|
|
199
|
+
# Print results
|
|
200
|
+
for hexdigest, path in results:
|
|
201
|
+
if args.raw:
|
|
202
|
+
print(hexdigest)
|
|
203
|
+
else:
|
|
204
|
+
print(format_line(hexdigest, path, args.binary))
|
|
205
|
+
|
|
206
|
+
if errors:
|
|
207
|
+
sys.exit(1)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
if __name__ == "__main__":
|
|
211
|
+
main()
|