dupegun 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dupegun-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Prasanna B
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
dupegun-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,102 @@
1
+ Metadata-Version: 2.4
2
+ Name: dupegun
3
+ Version: 1.0.0
4
+ Summary: Cross-platform duplicate file finder and cleaner
5
+ License: MIT License
6
+
7
+ Copyright (c) 2025 Prasanna B
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ of this software and associated documentation files (the "Software"), to deal
11
+ in the Software without restriction, including without limitation the rights
12
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the Software is
14
+ furnished to do so, subject to the following conditions:
15
+
16
+ The above copyright notice and this permission notice shall be included in all
17
+ copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
+ SOFTWARE.
26
+ Keywords: duplicate,files,cli,disk,cleaner
27
+ Classifier: Programming Language :: Python :: 3
28
+ Classifier: License :: OSI Approved :: MIT License
29
+ Classifier: Operating System :: OS Independent
30
+ Classifier: Environment :: Console
31
+ Requires-Python: >=3.9
32
+ Description-Content-Type: text/markdown
33
+ License-File: LICENSE
34
+ Requires-Dist: rich>=13.0
35
+ Requires-Dist: click>=8.0
36
+ Dynamic: license-file
37
+
38
+ # dupegun
39
+
40
+ Fast cross-platform duplicate file finder and cleaner for Windows, Linux and macOS.
41
+
42
+ ## Install
43
+
44
+ ```bash
45
+ pip install dupegun
46
+ ```
47
+
48
+ ## Commands
49
+
50
+ ```bash
51
+ # Find duplicates
52
+ dupegun scan ~/Downloads
53
+
54
+ # Skip files under 1 MB
55
+ dupegun scan ~/Downloads --min-size 1000000
56
+
57
+ # Scan multiple folders
58
+ dupegun scan ~/Downloads ~/Documents ~/Desktop
59
+
60
+ # Export to JSON
61
+ dupegun scan ~/Downloads --json results.json
62
+
63
+ # Export to CSV
64
+ dupegun scan ~/Downloads --csv results.csv
65
+
66
+ # Preview what would be deleted (safe)
67
+ dupegun delete ~/Downloads --strategy newest
68
+
69
+ # Actually delete
70
+ dupegun delete ~/Downloads --strategy newest --no-dry-run
71
+
72
+ # Confirm each group before deleting
73
+ dupegun delete ~/Downloads --no-dry-run --interactive
74
+
75
+ # Move duplicates to quarantine
76
+ dupegun move ~/Downloads --dest ~/quarantine --no-dry-run
77
+
78
+ # Replace duplicates with hard links
79
+ dupegun hardlink ~/Downloads --no-dry-run
80
+ ```
81
+
82
+ ## Strategies
83
+
84
+ | Flag | Keeps |
85
+ |---|---|
86
+ | `--strategy shortest` | Shortest file path (default) |
87
+ | `--strategy newest` | Most recently modified copy |
88
+ | `--strategy oldest` | Oldest copy |
89
+
90
+ ## Features
91
+
92
+ - Works on Windows, Linux, macOS
93
+ - All file types supported
94
+ - 3-pass engine (size → partial hash → full SHA-256)
95
+ - Colored terminal output
96
+ - Dry-run on by default (safe)
97
+ - JSON and CSV export
98
+ - Hard link support
99
+
100
+ ## License
101
+
102
+ MIT
@@ -0,0 +1,65 @@
1
+ # dupegun
2
+
3
+ Fast cross-platform duplicate file finder and cleaner for Windows, Linux and macOS.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install dupegun
9
+ ```
10
+
11
+ ## Commands
12
+
13
+ ```bash
14
+ # Find duplicates
15
+ dupegun scan ~/Downloads
16
+
17
+ # Skip files under 1 MB
18
+ dupegun scan ~/Downloads --min-size 1000000
19
+
20
+ # Scan multiple folders
21
+ dupegun scan ~/Downloads ~/Documents ~/Desktop
22
+
23
+ # Export to JSON
24
+ dupegun scan ~/Downloads --json results.json
25
+
26
+ # Export to CSV
27
+ dupegun scan ~/Downloads --csv results.csv
28
+
29
+ # Preview what would be deleted (safe)
30
+ dupegun delete ~/Downloads --strategy newest
31
+
32
+ # Actually delete
33
+ dupegun delete ~/Downloads --strategy newest --no-dry-run
34
+
35
+ # Confirm each group before deleting
36
+ dupegun delete ~/Downloads --no-dry-run --interactive
37
+
38
+ # Move duplicates to quarantine
39
+ dupegun move ~/Downloads --dest ~/quarantine --no-dry-run
40
+
41
+ # Replace duplicates with hard links
42
+ dupegun hardlink ~/Downloads --no-dry-run
43
+ ```
44
+
45
+ ## Strategies
46
+
47
+ | Flag | Keeps |
48
+ |---|---|
49
+ | `--strategy shortest` | Shortest file path (default) |
50
+ | `--strategy newest` | Most recently modified copy |
51
+ | `--strategy oldest` | Oldest copy |
52
+
53
+ ## Features
54
+
55
+ - Works on Windows, Linux, macOS
56
+ - All file types supported
57
+ - 3-pass engine (size → partial hash → full SHA-256)
58
+ - Colored terminal output
59
+ - Dry-run on by default (safe)
60
+ - JSON and CSV export
61
+ - Hard link support
62
+
63
+ ## License
64
+
65
+ MIT
File without changes
@@ -0,0 +1,103 @@
1
+ import os
2
+ import shutil
3
+ from pathlib import Path
4
+ from rich.console import Console
5
+ from rich.prompt import Confirm
6
+ from .reporter import human_size
7
+
8
+ console = Console()
9
+
10
+ def pick_keeper(paths: list, strategy: str) -> Path:
11
+ if strategy == "newest":
12
+ return max(paths, key=lambda p: p.stat().st_mtime)
13
+ if strategy == "oldest":
14
+ return min(paths, key=lambda p: p.stat().st_mtime)
15
+ if strategy == "shortest":
16
+ return min(paths, key=lambda p: len(str(p)))
17
+ return paths[0]
18
+
19
+ def delete_dupes(
20
+ groups: dict,
21
+ strategy: str = "shortest",
22
+ dry_run: bool = True,
23
+ interactive: bool = False,
24
+ ) -> None:
25
+ total_freed = 0
26
+
27
+ for hash_val, paths in groups.items():
28
+ keeper = pick_keeper(paths, strategy)
29
+ to_delete = [p for p in paths if p != keeper]
30
+
31
+ console.print(f"\n[bold]Keep:[/bold] [green]{keeper}[/green]")
32
+ for p in to_delete:
33
+ console.print(f"[bold]Delete:[/bold] [red]{p}[/red]")
34
+
35
+ if interactive:
36
+ if not Confirm.ask(" Proceed with this group?"):
37
+ continue
38
+
39
+ for p in to_delete:
40
+ size = p.stat().st_size
41
+ if dry_run:
42
+ console.print(f" [dim][DRY RUN] would delete {p}[/dim]")
43
+ else:
44
+ try:
45
+ p.unlink()
46
+ total_freed += size
47
+ console.print(f" [red]Deleted {p}[/red]")
48
+ except OSError as e:
49
+ console.print(f" [yellow]Error: {e}[/yellow]")
50
+
51
+ if not dry_run:
52
+ console.print(
53
+ f"\n[bold green]Freed {human_size(total_freed)}[/bold green]"
54
+ )
55
+
56
+ def move_dupes(
57
+ groups: dict,
58
+ dest: Path,
59
+ strategy: str = "shortest",
60
+ dry_run: bool = True,
61
+ ) -> None:
62
+ dest.mkdir(parents=True, exist_ok=True)
63
+
64
+ for hash_val, paths in groups.items():
65
+ keeper = pick_keeper(paths, strategy)
66
+ for p in paths:
67
+ if p == keeper:
68
+ continue
69
+ target = dest / p.name
70
+ if target.exists():
71
+ target = dest / f"{hash_val[:8]}_{p.name}"
72
+
73
+ if dry_run:
74
+ console.print(
75
+ f"[dim][DRY RUN] would move {p} → {target}[/dim]"
76
+ )
77
+ else:
78
+ shutil.move(str(p), str(target))
79
+ console.print(f"[yellow]Moved {p} → {target}[/yellow]")
80
+
81
+ def hardlink_dupes(
82
+ groups: dict,
83
+ strategy: str = "shortest",
84
+ dry_run: bool = True,
85
+ ) -> None:
86
+ for hash_val, paths in groups.items():
87
+ keeper = pick_keeper(paths, strategy)
88
+ for p in paths:
89
+ if p == keeper:
90
+ continue
91
+ if dry_run:
92
+ console.print(
93
+ f"[dim][DRY RUN] would hardlink {p} → {keeper}[/dim]"
94
+ )
95
+ else:
96
+ try:
97
+ p.unlink()
98
+ os.link(keeper, p)
99
+ console.print(
100
+ f"[cyan]Hardlinked {p} → {keeper}[/cyan]"
101
+ )
102
+ except OSError as e:
103
+ console.print(f"[yellow]Error: {e}[/yellow]")
@@ -0,0 +1,145 @@
1
+ import click
2
+ from pathlib import Path
3
+ from rich.console import Console
4
+ from rich.progress import (
5
+ Progress, SpinnerColumn,
6
+ TextColumn, BarColumn, TaskProgressColumn
7
+ )
8
+
9
+ from .scanner import find_duplicates
10
+ from .reporter import print_table, export_json, export_csv
11
+ from .actions import delete_dupes, move_dupes, hardlink_dupes
12
+
13
+ console = Console()
14
+
15
+ def _scan(paths, min_size):
16
+ roots = [Path(p) for p in paths]
17
+ with Progress(
18
+ SpinnerColumn(),
19
+ TextColumn("[progress.description]{task.description}"),
20
+ BarColumn(),
21
+ TaskProgressColumn(),
22
+ transient=True,
23
+ ) as progress:
24
+ task = progress.add_task("Scanning...", total=None)
25
+ def cb(done, total, path):
26
+ progress.update(
27
+ task, completed=done, total=total,
28
+ description=f"Hashing [cyan]{path.name}[/cyan]"
29
+ )
30
+ groups = find_duplicates(roots, min_size=min_size, progress_cb=cb)
31
+ return groups
32
+
33
+ @click.group()
34
+ @click.version_option("1.0.0", prog_name="dupegun")
35
+ def main():
36
+ """dupegun — find and destroy duplicate files.
37
+
38
+ Works on Windows, Linux and macOS. All file types supported.
39
+ """
40
+ pass
41
+
42
+ @main.command()
43
+ @click.argument("paths", nargs=-1, required=True,
44
+ type=click.Path(exists=True))
45
+ @click.option("--min-size", default=1,
46
+ help="Minimum file size in bytes to scan (default: 1)")
47
+ @click.option("--json", "out_json", default=None,
48
+ help="Export results to a JSON file")
49
+ @click.option("--csv", "out_csv", default=None,
50
+ help="Export results to a CSV file")
51
+ def scan(paths, min_size, out_json, out_csv):
52
+ """Scan folders and list all duplicate files."""
53
+ console.print(f"\n[bold]dupegun[/bold] — scanning {len(paths)} path(s)...\n")
54
+ groups = _scan(paths, min_size)
55
+
56
+ if not groups:
57
+ console.print("[bold green]No duplicates found![/bold green]")
58
+ return
59
+
60
+ print_table(groups)
61
+
62
+ if out_json:
63
+ export_json(groups, out_json)
64
+ if out_csv:
65
+ export_csv(groups, out_csv)
66
+
67
+ @main.command()
68
+ @click.argument("paths", nargs=-1, required=True,
69
+ type=click.Path(exists=True))
70
+ @click.option("--strategy", default="shortest",
71
+ type=click.Choice(["shortest", "newest", "oldest"]),
72
+ help="Which copy to keep (default: shortest path)")
73
+ @click.option("--dry-run/--no-dry-run", default=True,
74
+ help="Preview without deleting (default: ON)")
75
+ @click.option("--interactive", is_flag=True,
76
+ help="Confirm each group before deleting")
77
+ @click.option("--min-size", default=1)
78
+ def delete(paths, strategy, dry_run, interactive, min_size):
79
+ """Delete duplicates, keeping one copy per group."""
80
+ groups = _scan(paths, min_size)
81
+
82
+ if not groups:
83
+ console.print("[bold green]No duplicates found![/bold green]")
84
+ return
85
+
86
+ if dry_run:
87
+ console.print(
88
+ "[yellow]DRY RUN — nothing will be deleted. "
89
+ "Use --no-dry-run to actually delete.[/yellow]\n"
90
+ )
91
+
92
+ delete_dupes(
93
+ groups,
94
+ strategy=strategy,
95
+ dry_run=dry_run,
96
+ interactive=interactive
97
+ )
98
+
99
+ @main.command()
100
+ @click.argument("paths", nargs=-1, required=True,
101
+ type=click.Path(exists=True))
102
+ @click.option("--dest", required=True,
103
+ help="Destination folder to move duplicates into")
104
+ @click.option("--strategy", default="shortest",
105
+ type=click.Choice(["shortest", "newest", "oldest"]))
106
+ @click.option("--dry-run/--no-dry-run", default=True)
107
+ @click.option("--min-size", default=1)
108
+ def move(paths, dest, strategy, dry_run, min_size):
109
+ """Move duplicates to a quarantine folder instead of deleting."""
110
+ groups = _scan(paths, min_size)
111
+
112
+ if not groups:
113
+ console.print("[bold green]No duplicates found![/bold green]")
114
+ return
115
+
116
+ if dry_run:
117
+ console.print(
118
+ "[yellow]DRY RUN — nothing will be moved. "
119
+ "Use --no-dry-run to actually move.[/yellow]\n"
120
+ )
121
+
122
+ move_dupes(groups, Path(dest), strategy=strategy, dry_run=dry_run)
123
+
124
+ @main.command()
125
+ @click.argument("paths", nargs=-1, required=True,
126
+ type=click.Path(exists=True))
127
+ @click.option("--strategy", default="shortest",
128
+ type=click.Choice(["shortest", "newest", "oldest"]))
129
+ @click.option("--dry-run/--no-dry-run", default=True)
130
+ @click.option("--min-size", default=1)
131
+ def hardlink(paths, strategy, dry_run, min_size):
132
+ """Replace duplicates with hard links to save space."""
133
+ groups = _scan(paths, min_size)
134
+
135
+ if not groups:
136
+ console.print("[bold green]No duplicates found![/bold green]")
137
+ return
138
+
139
+ if dry_run:
140
+ console.print(
141
+ "[yellow]DRY RUN — nothing will be changed. "
142
+ "Use --no-dry-run to actually hardlink.[/yellow]\n"
143
+ )
144
+
145
+ hardlink_dupes(groups, strategy=strategy, dry_run=dry_run)
@@ -0,0 +1,75 @@
1
+ import json
2
+ import csv
3
+ import datetime
4
+ from pathlib import Path
5
+ from rich.table import Table
6
+ from rich.console import Console
7
+
8
+ console = Console()
9
+
10
+ def human_size(n: int) -> str:
11
+ for unit in ("B", "KB", "MB", "GB", "TB"):
12
+ if n < 1024:
13
+ return f"{n:.1f} {unit}"
14
+ n /= 1024
15
+ return f"{n:.1f} PB"
16
+
17
+ def print_table(groups: dict) -> None:
18
+ total_wasted = 0
19
+
20
+ for i, (hash_val, paths) in enumerate(groups.items(), 1):
21
+ size = paths[0].stat().st_size
22
+ wasted = size * (len(paths) - 1)
23
+ total_wasted += wasted
24
+
25
+ t = Table(
26
+ title=f"[bold]Group {i}[/bold] — {human_size(size)} each | "
27
+ f"[red]{human_size(wasted)} wasted[/red]",
28
+ show_lines=True,
29
+ )
30
+ t.add_column("#", style="dim", width=4)
31
+ t.add_column("Path", style="cyan")
32
+ t.add_column("Modified", style="yellow")
33
+ t.add_column("Size", justify="right")
34
+
35
+ for j, p in enumerate(paths, 1):
36
+ stat = p.stat()
37
+ mtime = datetime.datetime.fromtimestamp(
38
+ stat.st_mtime
39
+ ).strftime("%Y-%m-%d %H:%M")
40
+ t.add_row(str(j), str(p), mtime, human_size(stat.st_size))
41
+
42
+ console.print(t)
43
+
44
+ console.print(
45
+ f"\n[bold green]Total reclaimable:[/bold green] "
46
+ f"[green]{human_size(total_wasted)}[/green] "
47
+ f"across [bold]{len(groups)}[/bold] duplicate group(s)\n"
48
+ )
49
+
50
+ def export_json(groups: dict, out_path: str) -> None:
51
+ data = [
52
+ {
53
+ "hash": h,
54
+ "count": len(paths),
55
+ "size_each": paths[0].stat().st_size,
56
+ "files": [str(p) for p in paths]
57
+ }
58
+ for h, paths in groups.items()
59
+ ]
60
+ with open(out_path, "w") as f:
61
+ json.dump(data, f, indent=2)
62
+ console.print(f"[green]Exported JSON → {out_path}[/green]")
63
+
64
+ def export_csv(groups: dict, out_path: str) -> None:
65
+ with open(out_path, "w", newline="") as f:
66
+ w = csv.writer(f)
67
+ w.writerow(["group", "hash", "path", "size_bytes", "modified"])
68
+ for i, (h, paths) in enumerate(groups.items(), 1):
69
+ for p in paths:
70
+ stat = p.stat()
71
+ mtime = datetime.datetime.fromtimestamp(
72
+ stat.st_mtime
73
+ ).strftime("%Y-%m-%d %H:%M")
74
+ w.writerow([i, h, str(p), stat.st_size, mtime])
75
+ console.print(f"[green]Exported CSV → {out_path}[/green]")
@@ -0,0 +1,72 @@
1
+ import os
2
+ import hashlib
3
+ from pathlib import Path
4
+ from collections import defaultdict
5
+ from typing import Iterator
6
+
7
+ CHUNK = 65_536
8
+
9
+ def _hash_file(path: Path) -> str:
10
+ h = hashlib.sha256()
11
+ with open(path, "rb") as f:
12
+ while chunk := f.read(CHUNK):
13
+ h.update(chunk)
14
+ return h.hexdigest()
15
+
16
+ def _partial_hash(path: Path, size: int = 4096) -> str:
17
+ h = hashlib.sha256()
18
+ with open(path, "rb") as f:
19
+ h.update(f.read(size))
20
+ return h.hexdigest()
21
+
22
+ def walk_files(root: Path, min_size: int = 1) -> Iterator[Path]:
23
+ for dirpath, _, filenames in os.walk(root):
24
+ for name in filenames:
25
+ p = Path(dirpath) / name
26
+ try:
27
+ if p.stat().st_size >= min_size:
28
+ yield p
29
+ except (PermissionError, OSError):
30
+ pass
31
+
32
+ def find_duplicates(
33
+ roots: list,
34
+ min_size: int = 1,
35
+ progress_cb=None
36
+ ) -> dict:
37
+ by_size = defaultdict(list)
38
+ all_files = [f for root in roots for f in walk_files(root, min_size)]
39
+
40
+ for path in all_files:
41
+ by_size[path.stat().st_size].append(path)
42
+
43
+ size_candidates = [
44
+ p for files in by_size.values()
45
+ if len(files) > 1
46
+ for p in files
47
+ ]
48
+
49
+ by_partial = defaultdict(list)
50
+ for path in size_candidates:
51
+ try:
52
+ by_partial[_partial_hash(path)].append(path)
53
+ except (PermissionError, OSError):
54
+ pass
55
+
56
+ partial_candidates = [
57
+ p for files in by_partial.values()
58
+ if len(files) > 1
59
+ for p in files
60
+ ]
61
+
62
+ by_hash = defaultdict(list)
63
+ total = len(partial_candidates)
64
+ for i, path in enumerate(partial_candidates):
65
+ if progress_cb:
66
+ progress_cb(i + 1, total, path)
67
+ try:
68
+ by_hash[_hash_file(path)].append(path)
69
+ except (PermissionError, OSError):
70
+ pass
71
+
72
+ return {h: paths for h, paths in by_hash.items() if len(paths) > 1}
@@ -0,0 +1,102 @@
1
+ Metadata-Version: 2.4
2
+ Name: dupegun
3
+ Version: 1.0.0
4
+ Summary: Cross-platform duplicate file finder and cleaner
5
+ License: MIT License
6
+
7
+ Copyright (c) 2025 Prasanna B
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ of this software and associated documentation files (the "Software"), to deal
11
+ in the Software without restriction, including without limitation the rights
12
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the Software is
14
+ furnished to do so, subject to the following conditions:
15
+
16
+ The above copyright notice and this permission notice shall be included in all
17
+ copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
+ SOFTWARE.
26
+ Keywords: duplicate,files,cli,disk,cleaner
27
+ Classifier: Programming Language :: Python :: 3
28
+ Classifier: License :: OSI Approved :: MIT License
29
+ Classifier: Operating System :: OS Independent
30
+ Classifier: Environment :: Console
31
+ Requires-Python: >=3.9
32
+ Description-Content-Type: text/markdown
33
+ License-File: LICENSE
34
+ Requires-Dist: rich>=13.0
35
+ Requires-Dist: click>=8.0
36
+ Dynamic: license-file
37
+
38
+ # dupegun
39
+
40
+ Fast cross-platform duplicate file finder and cleaner for Windows, Linux and macOS.
41
+
42
+ ## Install
43
+
44
+ ```bash
45
+ pip install dupegun
46
+ ```
47
+
48
+ ## Commands
49
+
50
+ ```bash
51
+ # Find duplicates
52
+ dupegun scan ~/Downloads
53
+
54
+ # Skip files under 1 MB
55
+ dupegun scan ~/Downloads --min-size 1000000
56
+
57
+ # Scan multiple folders
58
+ dupegun scan ~/Downloads ~/Documents ~/Desktop
59
+
60
+ # Export to JSON
61
+ dupegun scan ~/Downloads --json results.json
62
+
63
+ # Export to CSV
64
+ dupegun scan ~/Downloads --csv results.csv
65
+
66
+ # Preview what would be deleted (safe)
67
+ dupegun delete ~/Downloads --strategy newest
68
+
69
+ # Actually delete
70
+ dupegun delete ~/Downloads --strategy newest --no-dry-run
71
+
72
+ # Confirm each group before deleting
73
+ dupegun delete ~/Downloads --no-dry-run --interactive
74
+
75
+ # Move duplicates to quarantine
76
+ dupegun move ~/Downloads --dest ~/quarantine --no-dry-run
77
+
78
+ # Replace duplicates with hard links
79
+ dupegun hardlink ~/Downloads --no-dry-run
80
+ ```
81
+
82
+ ## Strategies
83
+
84
+ | Flag | Keeps |
85
+ |---|---|
86
+ | `--strategy shortest` | Shortest file path (default) |
87
+ | `--strategy newest` | Most recently modified copy |
88
+ | `--strategy oldest` | Oldest copy |
89
+
90
+ ## Features
91
+
92
+ - Works on Windows, Linux, macOS
93
+ - All file types supported
94
+ - 3-pass engine (size → partial hash → full SHA-256)
95
+ - Colored terminal output
96
+ - Dry-run on by default (safe)
97
+ - JSON and CSV export
98
+ - Hard link support
99
+
100
+ ## License
101
+
102
+ MIT
@@ -0,0 +1,15 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ dupegun/__init__.py
5
+ dupegun/actions.py
6
+ dupegun/cli.py
7
+ dupegun/reporter.py
8
+ dupegun/scanner.py
9
+ dupegun.egg-info/PKG-INFO
10
+ dupegun.egg-info/SOURCES.txt
11
+ dupegun.egg-info/dependency_links.txt
12
+ dupegun.egg-info/entry_points.txt
13
+ dupegun.egg-info/requires.txt
14
+ dupegun.egg-info/top_level.txt
15
+ tests/test_scanner.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ dupegun = dupegun.cli:main
@@ -0,0 +1,2 @@
1
+ rich>=13.0
2
+ click>=8.0
@@ -0,0 +1 @@
1
+ dupegun
@@ -0,0 +1,25 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "dupegun"
7
+ version = "1.0.0"
8
+ description = "Cross-platform duplicate file finder and cleaner"
9
+ readme = "README.md"
10
+ license = {file = "LICENSE"}
11
+ requires-python = ">=3.9"
12
+ keywords = ["duplicate", "files", "cli", "disk", "cleaner"]
13
+ classifiers = [
14
+ "Programming Language :: Python :: 3",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Operating System :: OS Independent",
17
+ "Environment :: Console",
18
+ ]
19
+ dependencies = [
20
+ "rich>=13.0",
21
+ "click>=8.0",
22
+ ]
23
+
24
+ [project.scripts]
25
+ dupegun = "dupegun.cli:main"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,25 @@
1
+ import os
2
+ import tempfile
3
+ from pathlib import Path
4
+ from dupegun.scanner import find_duplicates
5
+
6
+ def test_finds_duplicates():
7
+ with tempfile.TemporaryDirectory() as tmp:
8
+ a = Path(tmp) / "a.txt"
9
+ b = Path(tmp) / "b.txt"
10
+ c = Path(tmp) / "c.txt"
11
+ a.write_text("hello duplicate")
12
+ b.write_text("hello duplicate")
13
+ c.write_text("unique content")
14
+
15
+ groups = find_duplicates([Path(tmp)])
16
+ assert len(groups) == 1
17
+ paths = list(groups.values())[0]
18
+ assert len(paths) == 2
19
+
20
+ def test_no_duplicates():
21
+ with tempfile.TemporaryDirectory() as tmp:
22
+ Path(tmp, "x.txt").write_text("aaa")
23
+ Path(tmp, "y.txt").write_text("bbb")
24
+ groups = find_duplicates([Path(tmp)])
25
+ assert len(groups) == 0