dupegun 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dupegun-1.0.0/LICENSE +21 -0
- dupegun-1.0.0/PKG-INFO +102 -0
- dupegun-1.0.0/README.md +65 -0
- dupegun-1.0.0/dupegun/__init__.py +0 -0
- dupegun-1.0.0/dupegun/actions.py +103 -0
- dupegun-1.0.0/dupegun/cli.py +145 -0
- dupegun-1.0.0/dupegun/reporter.py +75 -0
- dupegun-1.0.0/dupegun/scanner.py +72 -0
- dupegun-1.0.0/dupegun.egg-info/PKG-INFO +102 -0
- dupegun-1.0.0/dupegun.egg-info/SOURCES.txt +15 -0
- dupegun-1.0.0/dupegun.egg-info/dependency_links.txt +1 -0
- dupegun-1.0.0/dupegun.egg-info/entry_points.txt +2 -0
- dupegun-1.0.0/dupegun.egg-info/requires.txt +2 -0
- dupegun-1.0.0/dupegun.egg-info/top_level.txt +1 -0
- dupegun-1.0.0/pyproject.toml +25 -0
- dupegun-1.0.0/setup.cfg +4 -0
- dupegun-1.0.0/tests/test_scanner.py +25 -0
dupegun-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Prasanna B
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
dupegun-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dupegun
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Cross-platform duplicate file finder and cleaner
|
|
5
|
+
License: MIT License
|
|
6
|
+
|
|
7
|
+
Copyright (c) 2025 Prasanna B
|
|
8
|
+
|
|
9
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
10
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
11
|
+
in the Software without restriction, including without limitation the rights
|
|
12
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
13
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
14
|
+
furnished to do so, subject to the following conditions:
|
|
15
|
+
|
|
16
|
+
The above copyright notice and this permission notice shall be included in all
|
|
17
|
+
copies or substantial portions of the Software.
|
|
18
|
+
|
|
19
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
20
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
21
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
22
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
23
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
24
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
25
|
+
SOFTWARE.
|
|
26
|
+
Keywords: duplicate,files,cli,disk,cleaner
|
|
27
|
+
Classifier: Programming Language :: Python :: 3
|
|
28
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
29
|
+
Classifier: Operating System :: OS Independent
|
|
30
|
+
Classifier: Environment :: Console
|
|
31
|
+
Requires-Python: >=3.9
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
License-File: LICENSE
|
|
34
|
+
Requires-Dist: rich>=13.0
|
|
35
|
+
Requires-Dist: click>=8.0
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# dupegun
|
|
39
|
+
|
|
40
|
+
Fast cross-platform duplicate file finder and cleaner for Windows, Linux and macOS.
|
|
41
|
+
|
|
42
|
+
## Install
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install dupegun
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Commands
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
# Find duplicates
|
|
52
|
+
dupegun scan ~/Downloads
|
|
53
|
+
|
|
54
|
+
# Skip files under 1 MB
|
|
55
|
+
dupegun scan ~/Downloads --min-size 1000000
|
|
56
|
+
|
|
57
|
+
# Scan multiple folders
|
|
58
|
+
dupegun scan ~/Downloads ~/Documents ~/Desktop
|
|
59
|
+
|
|
60
|
+
# Export to JSON
|
|
61
|
+
dupegun scan ~/Downloads --json results.json
|
|
62
|
+
|
|
63
|
+
# Export to CSV
|
|
64
|
+
dupegun scan ~/Downloads --csv results.csv
|
|
65
|
+
|
|
66
|
+
# Preview what would be deleted (safe)
|
|
67
|
+
dupegun delete ~/Downloads --strategy newest
|
|
68
|
+
|
|
69
|
+
# Actually delete
|
|
70
|
+
dupegun delete ~/Downloads --strategy newest --no-dry-run
|
|
71
|
+
|
|
72
|
+
# Confirm each group before deleting
|
|
73
|
+
dupegun delete ~/Downloads --no-dry-run --interactive
|
|
74
|
+
|
|
75
|
+
# Move duplicates to quarantine
|
|
76
|
+
dupegun move ~/Downloads --dest ~/quarantine --no-dry-run
|
|
77
|
+
|
|
78
|
+
# Replace duplicates with hard links
|
|
79
|
+
dupegun hardlink ~/Downloads --no-dry-run
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Strategies
|
|
83
|
+
|
|
84
|
+
| Flag | Keeps |
|
|
85
|
+
|---|---|
|
|
86
|
+
| `--strategy shortest` | Shortest file path (default) |
|
|
87
|
+
| `--strategy newest` | Most recently modified copy |
|
|
88
|
+
| `--strategy oldest` | Oldest copy |
|
|
89
|
+
|
|
90
|
+
## Features
|
|
91
|
+
|
|
92
|
+
- Works on Windows, Linux, macOS
|
|
93
|
+
- All file types supported
|
|
94
|
+
- 3-pass engine (size → partial hash → full SHA-256)
|
|
95
|
+
- Colored terminal output
|
|
96
|
+
- Dry-run on by default (safe)
|
|
97
|
+
- JSON and CSV export
|
|
98
|
+
- Hard link support
|
|
99
|
+
|
|
100
|
+
## License
|
|
101
|
+
|
|
102
|
+
MIT
|
dupegun-1.0.0/README.md
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# dupegun
|
|
2
|
+
|
|
3
|
+
Fast cross-platform duplicate file finder and cleaner for Windows, Linux and macOS.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install dupegun
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Commands
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Find duplicates
|
|
15
|
+
dupegun scan ~/Downloads
|
|
16
|
+
|
|
17
|
+
# Skip files under 1 MB
|
|
18
|
+
dupegun scan ~/Downloads --min-size 1000000
|
|
19
|
+
|
|
20
|
+
# Scan multiple folders
|
|
21
|
+
dupegun scan ~/Downloads ~/Documents ~/Desktop
|
|
22
|
+
|
|
23
|
+
# Export to JSON
|
|
24
|
+
dupegun scan ~/Downloads --json results.json
|
|
25
|
+
|
|
26
|
+
# Export to CSV
|
|
27
|
+
dupegun scan ~/Downloads --csv results.csv
|
|
28
|
+
|
|
29
|
+
# Preview what would be deleted (safe)
|
|
30
|
+
dupegun delete ~/Downloads --strategy newest
|
|
31
|
+
|
|
32
|
+
# Actually delete
|
|
33
|
+
dupegun delete ~/Downloads --strategy newest --no-dry-run
|
|
34
|
+
|
|
35
|
+
# Confirm each group before deleting
|
|
36
|
+
dupegun delete ~/Downloads --no-dry-run --interactive
|
|
37
|
+
|
|
38
|
+
# Move duplicates to quarantine
|
|
39
|
+
dupegun move ~/Downloads --dest ~/quarantine --no-dry-run
|
|
40
|
+
|
|
41
|
+
# Replace duplicates with hard links
|
|
42
|
+
dupegun hardlink ~/Downloads --no-dry-run
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Strategies
|
|
46
|
+
|
|
47
|
+
| Flag | Keeps |
|
|
48
|
+
|---|---|
|
|
49
|
+
| `--strategy shortest` | Shortest file path (default) |
|
|
50
|
+
| `--strategy newest` | Most recently modified copy |
|
|
51
|
+
| `--strategy oldest` | Oldest copy |
|
|
52
|
+
|
|
53
|
+
## Features
|
|
54
|
+
|
|
55
|
+
- Works on Windows, Linux, macOS
|
|
56
|
+
- All file types supported
|
|
57
|
+
- 3-pass engine (size → partial hash → full SHA-256)
|
|
58
|
+
- Colored terminal output
|
|
59
|
+
- Dry-run on by default (safe)
|
|
60
|
+
- JSON and CSV export
|
|
61
|
+
- Hard link support
|
|
62
|
+
|
|
63
|
+
## License
|
|
64
|
+
|
|
65
|
+
MIT
|
|
File without changes
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
from rich.prompt import Confirm
|
|
6
|
+
from .reporter import human_size
|
|
7
|
+
|
|
8
|
+
console = Console()
|
|
9
|
+
|
|
10
|
+
def pick_keeper(paths: list, strategy: str) -> Path:
|
|
11
|
+
if strategy == "newest":
|
|
12
|
+
return max(paths, key=lambda p: p.stat().st_mtime)
|
|
13
|
+
if strategy == "oldest":
|
|
14
|
+
return min(paths, key=lambda p: p.stat().st_mtime)
|
|
15
|
+
if strategy == "shortest":
|
|
16
|
+
return min(paths, key=lambda p: len(str(p)))
|
|
17
|
+
return paths[0]
|
|
18
|
+
|
|
19
|
+
def delete_dupes(
|
|
20
|
+
groups: dict,
|
|
21
|
+
strategy: str = "shortest",
|
|
22
|
+
dry_run: bool = True,
|
|
23
|
+
interactive: bool = False,
|
|
24
|
+
) -> None:
|
|
25
|
+
total_freed = 0
|
|
26
|
+
|
|
27
|
+
for hash_val, paths in groups.items():
|
|
28
|
+
keeper = pick_keeper(paths, strategy)
|
|
29
|
+
to_delete = [p for p in paths if p != keeper]
|
|
30
|
+
|
|
31
|
+
console.print(f"\n[bold]Keep:[/bold] [green]{keeper}[/green]")
|
|
32
|
+
for p in to_delete:
|
|
33
|
+
console.print(f"[bold]Delete:[/bold] [red]{p}[/red]")
|
|
34
|
+
|
|
35
|
+
if interactive:
|
|
36
|
+
if not Confirm.ask(" Proceed with this group?"):
|
|
37
|
+
continue
|
|
38
|
+
|
|
39
|
+
for p in to_delete:
|
|
40
|
+
size = p.stat().st_size
|
|
41
|
+
if dry_run:
|
|
42
|
+
console.print(f" [dim][DRY RUN] would delete {p}[/dim]")
|
|
43
|
+
else:
|
|
44
|
+
try:
|
|
45
|
+
p.unlink()
|
|
46
|
+
total_freed += size
|
|
47
|
+
console.print(f" [red]Deleted {p}[/red]")
|
|
48
|
+
except OSError as e:
|
|
49
|
+
console.print(f" [yellow]Error: {e}[/yellow]")
|
|
50
|
+
|
|
51
|
+
if not dry_run:
|
|
52
|
+
console.print(
|
|
53
|
+
f"\n[bold green]Freed {human_size(total_freed)}[/bold green]"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def move_dupes(
|
|
57
|
+
groups: dict,
|
|
58
|
+
dest: Path,
|
|
59
|
+
strategy: str = "shortest",
|
|
60
|
+
dry_run: bool = True,
|
|
61
|
+
) -> None:
|
|
62
|
+
dest.mkdir(parents=True, exist_ok=True)
|
|
63
|
+
|
|
64
|
+
for hash_val, paths in groups.items():
|
|
65
|
+
keeper = pick_keeper(paths, strategy)
|
|
66
|
+
for p in paths:
|
|
67
|
+
if p == keeper:
|
|
68
|
+
continue
|
|
69
|
+
target = dest / p.name
|
|
70
|
+
if target.exists():
|
|
71
|
+
target = dest / f"{hash_val[:8]}_{p.name}"
|
|
72
|
+
|
|
73
|
+
if dry_run:
|
|
74
|
+
console.print(
|
|
75
|
+
f"[dim][DRY RUN] would move {p} → {target}[/dim]"
|
|
76
|
+
)
|
|
77
|
+
else:
|
|
78
|
+
shutil.move(str(p), str(target))
|
|
79
|
+
console.print(f"[yellow]Moved {p} → {target}[/yellow]")
|
|
80
|
+
|
|
81
|
+
def hardlink_dupes(
|
|
82
|
+
groups: dict,
|
|
83
|
+
strategy: str = "shortest",
|
|
84
|
+
dry_run: bool = True,
|
|
85
|
+
) -> None:
|
|
86
|
+
for hash_val, paths in groups.items():
|
|
87
|
+
keeper = pick_keeper(paths, strategy)
|
|
88
|
+
for p in paths:
|
|
89
|
+
if p == keeper:
|
|
90
|
+
continue
|
|
91
|
+
if dry_run:
|
|
92
|
+
console.print(
|
|
93
|
+
f"[dim][DRY RUN] would hardlink {p} → {keeper}[/dim]"
|
|
94
|
+
)
|
|
95
|
+
else:
|
|
96
|
+
try:
|
|
97
|
+
p.unlink()
|
|
98
|
+
os.link(keeper, p)
|
|
99
|
+
console.print(
|
|
100
|
+
f"[cyan]Hardlinked {p} → {keeper}[/cyan]"
|
|
101
|
+
)
|
|
102
|
+
except OSError as e:
|
|
103
|
+
console.print(f"[yellow]Error: {e}[/yellow]")
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import click
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from rich.console import Console
|
|
4
|
+
from rich.progress import (
|
|
5
|
+
Progress, SpinnerColumn,
|
|
6
|
+
TextColumn, BarColumn, TaskProgressColumn
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
from .scanner import find_duplicates
|
|
10
|
+
from .reporter import print_table, export_json, export_csv
|
|
11
|
+
from .actions import delete_dupes, move_dupes, hardlink_dupes
|
|
12
|
+
|
|
13
|
+
console = Console()
|
|
14
|
+
|
|
15
|
+
def _scan(paths, min_size):
|
|
16
|
+
roots = [Path(p) for p in paths]
|
|
17
|
+
with Progress(
|
|
18
|
+
SpinnerColumn(),
|
|
19
|
+
TextColumn("[progress.description]{task.description}"),
|
|
20
|
+
BarColumn(),
|
|
21
|
+
TaskProgressColumn(),
|
|
22
|
+
transient=True,
|
|
23
|
+
) as progress:
|
|
24
|
+
task = progress.add_task("Scanning...", total=None)
|
|
25
|
+
def cb(done, total, path):
|
|
26
|
+
progress.update(
|
|
27
|
+
task, completed=done, total=total,
|
|
28
|
+
description=f"Hashing [cyan]{path.name}[/cyan]"
|
|
29
|
+
)
|
|
30
|
+
groups = find_duplicates(roots, min_size=min_size, progress_cb=cb)
|
|
31
|
+
return groups
|
|
32
|
+
|
|
33
|
+
@click.group()
|
|
34
|
+
@click.version_option("1.0.0", prog_name="dupegun")
|
|
35
|
+
def main():
|
|
36
|
+
"""dupegun — find and destroy duplicate files.
|
|
37
|
+
|
|
38
|
+
Works on Windows, Linux and macOS. All file types supported.
|
|
39
|
+
"""
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
@main.command()
|
|
43
|
+
@click.argument("paths", nargs=-1, required=True,
|
|
44
|
+
type=click.Path(exists=True))
|
|
45
|
+
@click.option("--min-size", default=1,
|
|
46
|
+
help="Minimum file size in bytes to scan (default: 1)")
|
|
47
|
+
@click.option("--json", "out_json", default=None,
|
|
48
|
+
help="Export results to a JSON file")
|
|
49
|
+
@click.option("--csv", "out_csv", default=None,
|
|
50
|
+
help="Export results to a CSV file")
|
|
51
|
+
def scan(paths, min_size, out_json, out_csv):
|
|
52
|
+
"""Scan folders and list all duplicate files."""
|
|
53
|
+
console.print(f"\n[bold]dupegun[/bold] — scanning {len(paths)} path(s)...\n")
|
|
54
|
+
groups = _scan(paths, min_size)
|
|
55
|
+
|
|
56
|
+
if not groups:
|
|
57
|
+
console.print("[bold green]No duplicates found![/bold green]")
|
|
58
|
+
return
|
|
59
|
+
|
|
60
|
+
print_table(groups)
|
|
61
|
+
|
|
62
|
+
if out_json:
|
|
63
|
+
export_json(groups, out_json)
|
|
64
|
+
if out_csv:
|
|
65
|
+
export_csv(groups, out_csv)
|
|
66
|
+
|
|
67
|
+
@main.command()
|
|
68
|
+
@click.argument("paths", nargs=-1, required=True,
|
|
69
|
+
type=click.Path(exists=True))
|
|
70
|
+
@click.option("--strategy", default="shortest",
|
|
71
|
+
type=click.Choice(["shortest", "newest", "oldest"]),
|
|
72
|
+
help="Which copy to keep (default: shortest path)")
|
|
73
|
+
@click.option("--dry-run/--no-dry-run", default=True,
|
|
74
|
+
help="Preview without deleting (default: ON)")
|
|
75
|
+
@click.option("--interactive", is_flag=True,
|
|
76
|
+
help="Confirm each group before deleting")
|
|
77
|
+
@click.option("--min-size", default=1)
|
|
78
|
+
def delete(paths, strategy, dry_run, interactive, min_size):
|
|
79
|
+
"""Delete duplicates, keeping one copy per group."""
|
|
80
|
+
groups = _scan(paths, min_size)
|
|
81
|
+
|
|
82
|
+
if not groups:
|
|
83
|
+
console.print("[bold green]No duplicates found![/bold green]")
|
|
84
|
+
return
|
|
85
|
+
|
|
86
|
+
if dry_run:
|
|
87
|
+
console.print(
|
|
88
|
+
"[yellow]DRY RUN — nothing will be deleted. "
|
|
89
|
+
"Use --no-dry-run to actually delete.[/yellow]\n"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
delete_dupes(
|
|
93
|
+
groups,
|
|
94
|
+
strategy=strategy,
|
|
95
|
+
dry_run=dry_run,
|
|
96
|
+
interactive=interactive
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
@main.command()
|
|
100
|
+
@click.argument("paths", nargs=-1, required=True,
|
|
101
|
+
type=click.Path(exists=True))
|
|
102
|
+
@click.option("--dest", required=True,
|
|
103
|
+
help="Destination folder to move duplicates into")
|
|
104
|
+
@click.option("--strategy", default="shortest",
|
|
105
|
+
type=click.Choice(["shortest", "newest", "oldest"]))
|
|
106
|
+
@click.option("--dry-run/--no-dry-run", default=True)
|
|
107
|
+
@click.option("--min-size", default=1)
|
|
108
|
+
def move(paths, dest, strategy, dry_run, min_size):
|
|
109
|
+
"""Move duplicates to a quarantine folder instead of deleting."""
|
|
110
|
+
groups = _scan(paths, min_size)
|
|
111
|
+
|
|
112
|
+
if not groups:
|
|
113
|
+
console.print("[bold green]No duplicates found![/bold green]")
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
if dry_run:
|
|
117
|
+
console.print(
|
|
118
|
+
"[yellow]DRY RUN — nothing will be moved. "
|
|
119
|
+
"Use --no-dry-run to actually move.[/yellow]\n"
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
move_dupes(groups, Path(dest), strategy=strategy, dry_run=dry_run)
|
|
123
|
+
|
|
124
|
+
@main.command()
|
|
125
|
+
@click.argument("paths", nargs=-1, required=True,
|
|
126
|
+
type=click.Path(exists=True))
|
|
127
|
+
@click.option("--strategy", default="shortest",
|
|
128
|
+
type=click.Choice(["shortest", "newest", "oldest"]))
|
|
129
|
+
@click.option("--dry-run/--no-dry-run", default=True)
|
|
130
|
+
@click.option("--min-size", default=1)
|
|
131
|
+
def hardlink(paths, strategy, dry_run, min_size):
|
|
132
|
+
"""Replace duplicates with hard links to save space."""
|
|
133
|
+
groups = _scan(paths, min_size)
|
|
134
|
+
|
|
135
|
+
if not groups:
|
|
136
|
+
console.print("[bold green]No duplicates found![/bold green]")
|
|
137
|
+
return
|
|
138
|
+
|
|
139
|
+
if dry_run:
|
|
140
|
+
console.print(
|
|
141
|
+
"[yellow]DRY RUN — nothing will be changed. "
|
|
142
|
+
"Use --no-dry-run to actually hardlink.[/yellow]\n"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
hardlink_dupes(groups, strategy=strategy, dry_run=dry_run)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import csv
|
|
3
|
+
import datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from rich.table import Table
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
|
|
8
|
+
console = Console()
|
|
9
|
+
|
|
10
|
+
def human_size(n: int) -> str:
|
|
11
|
+
for unit in ("B", "KB", "MB", "GB", "TB"):
|
|
12
|
+
if n < 1024:
|
|
13
|
+
return f"{n:.1f} {unit}"
|
|
14
|
+
n /= 1024
|
|
15
|
+
return f"{n:.1f} PB"
|
|
16
|
+
|
|
17
|
+
def print_table(groups: dict) -> None:
|
|
18
|
+
total_wasted = 0
|
|
19
|
+
|
|
20
|
+
for i, (hash_val, paths) in enumerate(groups.items(), 1):
|
|
21
|
+
size = paths[0].stat().st_size
|
|
22
|
+
wasted = size * (len(paths) - 1)
|
|
23
|
+
total_wasted += wasted
|
|
24
|
+
|
|
25
|
+
t = Table(
|
|
26
|
+
title=f"[bold]Group {i}[/bold] — {human_size(size)} each | "
|
|
27
|
+
f"[red]{human_size(wasted)} wasted[/red]",
|
|
28
|
+
show_lines=True,
|
|
29
|
+
)
|
|
30
|
+
t.add_column("#", style="dim", width=4)
|
|
31
|
+
t.add_column("Path", style="cyan")
|
|
32
|
+
t.add_column("Modified", style="yellow")
|
|
33
|
+
t.add_column("Size", justify="right")
|
|
34
|
+
|
|
35
|
+
for j, p in enumerate(paths, 1):
|
|
36
|
+
stat = p.stat()
|
|
37
|
+
mtime = datetime.datetime.fromtimestamp(
|
|
38
|
+
stat.st_mtime
|
|
39
|
+
).strftime("%Y-%m-%d %H:%M")
|
|
40
|
+
t.add_row(str(j), str(p), mtime, human_size(stat.st_size))
|
|
41
|
+
|
|
42
|
+
console.print(t)
|
|
43
|
+
|
|
44
|
+
console.print(
|
|
45
|
+
f"\n[bold green]Total reclaimable:[/bold green] "
|
|
46
|
+
f"[green]{human_size(total_wasted)}[/green] "
|
|
47
|
+
f"across [bold]{len(groups)}[/bold] duplicate group(s)\n"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
def export_json(groups: dict, out_path: str) -> None:
|
|
51
|
+
data = [
|
|
52
|
+
{
|
|
53
|
+
"hash": h,
|
|
54
|
+
"count": len(paths),
|
|
55
|
+
"size_each": paths[0].stat().st_size,
|
|
56
|
+
"files": [str(p) for p in paths]
|
|
57
|
+
}
|
|
58
|
+
for h, paths in groups.items()
|
|
59
|
+
]
|
|
60
|
+
with open(out_path, "w") as f:
|
|
61
|
+
json.dump(data, f, indent=2)
|
|
62
|
+
console.print(f"[green]Exported JSON → {out_path}[/green]")
|
|
63
|
+
|
|
64
|
+
def export_csv(groups: dict, out_path: str) -> None:
|
|
65
|
+
with open(out_path, "w", newline="") as f:
|
|
66
|
+
w = csv.writer(f)
|
|
67
|
+
w.writerow(["group", "hash", "path", "size_bytes", "modified"])
|
|
68
|
+
for i, (h, paths) in enumerate(groups.items(), 1):
|
|
69
|
+
for p in paths:
|
|
70
|
+
stat = p.stat()
|
|
71
|
+
mtime = datetime.datetime.fromtimestamp(
|
|
72
|
+
stat.st_mtime
|
|
73
|
+
).strftime("%Y-%m-%d %H:%M")
|
|
74
|
+
w.writerow([i, h, str(p), stat.st_size, mtime])
|
|
75
|
+
console.print(f"[green]Exported CSV → {out_path}[/green]")
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import hashlib
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from typing import Iterator
|
|
6
|
+
|
|
7
|
+
CHUNK = 65_536
|
|
8
|
+
|
|
9
|
+
def _hash_file(path: Path) -> str:
|
|
10
|
+
h = hashlib.sha256()
|
|
11
|
+
with open(path, "rb") as f:
|
|
12
|
+
while chunk := f.read(CHUNK):
|
|
13
|
+
h.update(chunk)
|
|
14
|
+
return h.hexdigest()
|
|
15
|
+
|
|
16
|
+
def _partial_hash(path: Path, size: int = 4096) -> str:
|
|
17
|
+
h = hashlib.sha256()
|
|
18
|
+
with open(path, "rb") as f:
|
|
19
|
+
h.update(f.read(size))
|
|
20
|
+
return h.hexdigest()
|
|
21
|
+
|
|
22
|
+
def walk_files(root: Path, min_size: int = 1) -> Iterator[Path]:
|
|
23
|
+
for dirpath, _, filenames in os.walk(root):
|
|
24
|
+
for name in filenames:
|
|
25
|
+
p = Path(dirpath) / name
|
|
26
|
+
try:
|
|
27
|
+
if p.stat().st_size >= min_size:
|
|
28
|
+
yield p
|
|
29
|
+
except (PermissionError, OSError):
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
def find_duplicates(
|
|
33
|
+
roots: list,
|
|
34
|
+
min_size: int = 1,
|
|
35
|
+
progress_cb=None
|
|
36
|
+
) -> dict:
|
|
37
|
+
by_size = defaultdict(list)
|
|
38
|
+
all_files = [f for root in roots for f in walk_files(root, min_size)]
|
|
39
|
+
|
|
40
|
+
for path in all_files:
|
|
41
|
+
by_size[path.stat().st_size].append(path)
|
|
42
|
+
|
|
43
|
+
size_candidates = [
|
|
44
|
+
p for files in by_size.values()
|
|
45
|
+
if len(files) > 1
|
|
46
|
+
for p in files
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
by_partial = defaultdict(list)
|
|
50
|
+
for path in size_candidates:
|
|
51
|
+
try:
|
|
52
|
+
by_partial[_partial_hash(path)].append(path)
|
|
53
|
+
except (PermissionError, OSError):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
partial_candidates = [
|
|
57
|
+
p for files in by_partial.values()
|
|
58
|
+
if len(files) > 1
|
|
59
|
+
for p in files
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
by_hash = defaultdict(list)
|
|
63
|
+
total = len(partial_candidates)
|
|
64
|
+
for i, path in enumerate(partial_candidates):
|
|
65
|
+
if progress_cb:
|
|
66
|
+
progress_cb(i + 1, total, path)
|
|
67
|
+
try:
|
|
68
|
+
by_hash[_hash_file(path)].append(path)
|
|
69
|
+
except (PermissionError, OSError):
|
|
70
|
+
pass
|
|
71
|
+
|
|
72
|
+
return {h: paths for h, paths in by_hash.items() if len(paths) > 1}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dupegun
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Cross-platform duplicate file finder and cleaner
|
|
5
|
+
License: MIT License
|
|
6
|
+
|
|
7
|
+
Copyright (c) 2025 Prasanna B
|
|
8
|
+
|
|
9
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
10
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
11
|
+
in the Software without restriction, including without limitation the rights
|
|
12
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
13
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
14
|
+
furnished to do so, subject to the following conditions:
|
|
15
|
+
|
|
16
|
+
The above copyright notice and this permission notice shall be included in all
|
|
17
|
+
copies or substantial portions of the Software.
|
|
18
|
+
|
|
19
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
20
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
21
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
22
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
23
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
24
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
25
|
+
SOFTWARE.
|
|
26
|
+
Keywords: duplicate,files,cli,disk,cleaner
|
|
27
|
+
Classifier: Programming Language :: Python :: 3
|
|
28
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
29
|
+
Classifier: Operating System :: OS Independent
|
|
30
|
+
Classifier: Environment :: Console
|
|
31
|
+
Requires-Python: >=3.9
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
License-File: LICENSE
|
|
34
|
+
Requires-Dist: rich>=13.0
|
|
35
|
+
Requires-Dist: click>=8.0
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# dupegun
|
|
39
|
+
|
|
40
|
+
Fast cross-platform duplicate file finder and cleaner for Windows, Linux and macOS.
|
|
41
|
+
|
|
42
|
+
## Install
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install dupegun
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Commands
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
# Find duplicates
|
|
52
|
+
dupegun scan ~/Downloads
|
|
53
|
+
|
|
54
|
+
# Skip files under 1 MB
|
|
55
|
+
dupegun scan ~/Downloads --min-size 1000000
|
|
56
|
+
|
|
57
|
+
# Scan multiple folders
|
|
58
|
+
dupegun scan ~/Downloads ~/Documents ~/Desktop
|
|
59
|
+
|
|
60
|
+
# Export to JSON
|
|
61
|
+
dupegun scan ~/Downloads --json results.json
|
|
62
|
+
|
|
63
|
+
# Export to CSV
|
|
64
|
+
dupegun scan ~/Downloads --csv results.csv
|
|
65
|
+
|
|
66
|
+
# Preview what would be deleted (safe)
|
|
67
|
+
dupegun delete ~/Downloads --strategy newest
|
|
68
|
+
|
|
69
|
+
# Actually delete
|
|
70
|
+
dupegun delete ~/Downloads --strategy newest --no-dry-run
|
|
71
|
+
|
|
72
|
+
# Confirm each group before deleting
|
|
73
|
+
dupegun delete ~/Downloads --no-dry-run --interactive
|
|
74
|
+
|
|
75
|
+
# Move duplicates to quarantine
|
|
76
|
+
dupegun move ~/Downloads --dest ~/quarantine --no-dry-run
|
|
77
|
+
|
|
78
|
+
# Replace duplicates with hard links
|
|
79
|
+
dupegun hardlink ~/Downloads --no-dry-run
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Strategies
|
|
83
|
+
|
|
84
|
+
| Flag | Keeps |
|
|
85
|
+
|---|---|
|
|
86
|
+
| `--strategy shortest` | Shortest file path (default) |
|
|
87
|
+
| `--strategy newest` | Most recently modified copy |
|
|
88
|
+
| `--strategy oldest` | Oldest copy |
|
|
89
|
+
|
|
90
|
+
## Features
|
|
91
|
+
|
|
92
|
+
- Works on Windows, Linux, macOS
|
|
93
|
+
- All file types supported
|
|
94
|
+
- 3-pass engine (size → partial hash → full SHA-256)
|
|
95
|
+
- Colored terminal output
|
|
96
|
+
- Dry-run on by default (safe)
|
|
97
|
+
- JSON and CSV export
|
|
98
|
+
- Hard link support
|
|
99
|
+
|
|
100
|
+
## License
|
|
101
|
+
|
|
102
|
+
MIT
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
dupegun/__init__.py
|
|
5
|
+
dupegun/actions.py
|
|
6
|
+
dupegun/cli.py
|
|
7
|
+
dupegun/reporter.py
|
|
8
|
+
dupegun/scanner.py
|
|
9
|
+
dupegun.egg-info/PKG-INFO
|
|
10
|
+
dupegun.egg-info/SOURCES.txt
|
|
11
|
+
dupegun.egg-info/dependency_links.txt
|
|
12
|
+
dupegun.egg-info/entry_points.txt
|
|
13
|
+
dupegun.egg-info/requires.txt
|
|
14
|
+
dupegun.egg-info/top_level.txt
|
|
15
|
+
tests/test_scanner.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
dupegun
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "dupegun"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "Cross-platform duplicate file finder and cleaner"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {file = "LICENSE"}
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
keywords = ["duplicate", "files", "cli", "disk", "cleaner"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Operating System :: OS Independent",
|
|
17
|
+
"Environment :: Console",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"rich>=13.0",
|
|
21
|
+
"click>=8.0",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[project.scripts]
|
|
25
|
+
dupegun = "dupegun.cli:main"
|
dupegun-1.0.0/setup.cfg
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import tempfile
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from dupegun.scanner import find_duplicates
|
|
5
|
+
|
|
6
|
+
def test_finds_duplicates():
|
|
7
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
8
|
+
a = Path(tmp) / "a.txt"
|
|
9
|
+
b = Path(tmp) / "b.txt"
|
|
10
|
+
c = Path(tmp) / "c.txt"
|
|
11
|
+
a.write_text("hello duplicate")
|
|
12
|
+
b.write_text("hello duplicate")
|
|
13
|
+
c.write_text("unique content")
|
|
14
|
+
|
|
15
|
+
groups = find_duplicates([Path(tmp)])
|
|
16
|
+
assert len(groups) == 1
|
|
17
|
+
paths = list(groups.values())[0]
|
|
18
|
+
assert len(paths) == 2
|
|
19
|
+
|
|
20
|
+
def test_no_duplicates():
|
|
21
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
22
|
+
Path(tmp, "x.txt").write_text("aaa")
|
|
23
|
+
Path(tmp, "y.txt").write_text("bbb")
|
|
24
|
+
groups = find_duplicates([Path(tmp)])
|
|
25
|
+
assert len(groups) == 0
|