onlyone 2.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
onlyone/__init__.py ADDED
@@ -0,0 +1,43 @@
1
+ """
2
+ OnlyOne — fast duplicate file finder with optional GUI.
3
+
4
+ Core features:
5
+ - Three deduplication modes: FAST (size + front hash), NORMAL (size + 3 partial hashes), FULL (size + full content hash)
6
+ - Safe deletion to system trash (via send2trash)
7
+ - Optional GUI with PySide6 (install with [gui] extra)
8
+ - CLI interface for headless/server usage
9
+ """
10
+
11
+ # Get version
12
+ try:
13
+ from importlib.metadata import version as _version
14
+ __version__ = _version("onlyone")
15
+ except Exception:
16
+ try:
17
+ import tomllib # Python 3.11+
18
+ except ImportError:
19
+ import tomli as tomllib # Python < 3.11: pip install tomli
20
+
21
+ with open("pyproject.toml", "rb") as f:
22
+ __version__ = tomllib.load(f)["project"]["version"]
23
+
24
+ # Public API — only what users should import directly
25
+ from onlyone.commands import DeduplicationCommand
26
+ #from .cli import main as cli_main
27
+ from onlyone.core import DeduplicationParams, DeduplicationMode, SortOrder, File, DuplicateGroup
28
+ from onlyone.utils.convert_utils import ConvertUtils
29
+ from onlyone.services import DuplicateService
30
+ from onlyone.services.file_service import FileService
31
+
32
+ __all__ = [
33
+ "DeduplicationCommand",
34
+ "DeduplicationParams",
35
+ "DeduplicationMode",
36
+ "SortOrder",
37
+ "File",
38
+ "DuplicateGroup",
39
+ "ConvertUtils",
40
+ "DuplicateService",
41
+ "FileService",
42
+ "__version__",
43
+ ]
onlyone/cli.py ADDED
@@ -0,0 +1,483 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ OnlyOne CLI — Command line interface for duplicate file detection and removal.
4
+ Implements the same core engine as GUI but with console-based interaction.
5
+ All operations are safe: deletion moves files to system trash, never permanent erase.
6
+ """
7
+ from __future__ import annotations # Enable postponed evaluation of annotations (PEP 563)
8
+ import argparse
9
+ import sys
10
+ import os
11
+ import time
12
+ from pathlib import Path
13
+ from typing import List, Optional, NoReturn
14
+ import logging
15
+
16
+ logging.basicConfig(
17
+ level=logging.ERROR,
18
+ format="%(levelname)-8s | %(name)-25s | %(message)s"
19
+ )
20
+
21
+ # === EARLY DEPENDENCY VALIDATION ===
22
+ _MISSING_DEPS = []
23
+ try:
24
+ from send2trash import send2trash
25
+ except ImportError:
26
+ _MISSING_DEPS.append("send2trash")
27
+
28
+ try:
29
+ import xxhash
30
+ except ImportError:
31
+ _MISSING_DEPS.append("xxhash")
32
+
33
+ if _MISSING_DEPS:
34
+ print("❌ Missing required dependencies:", file=sys.stderr)
35
+ print(f" pip install {' '.join(_MISSING_DEPS)}", file=sys.stderr)
36
+ print("\nOr install all dependencies:", file=sys.stderr)
37
+ print(" pip install -r requirements.txt", file=sys.stderr)
38
+ sys.exit(1)
39
+
40
+ # === NORMAL IMPORTS (after validation) ===
41
+ from onlyone.core.models import DeduplicationMode, DeduplicationParams, DuplicateGroup, SortOrder
42
+ from onlyone.commands import DeduplicationCommand
43
+ from onlyone.utils.convert_utils import ConvertUtils
44
+ from onlyone.services.file_service import FileService
45
+ from onlyone.services.duplicate_service import DuplicateService
46
+
47
+
48
+ class CLIApplication:
49
+ """Main CLI application controller."""
50
+
51
+ def __init__(self):
52
+ self.start_time: float = time.time()
53
+ self.verbose: bool = False
54
+ self.quiet: bool = False
55
+
56
+ @staticmethod
57
+ def parse_args() -> argparse.Namespace:
58
+ """Parse and validate command-line arguments."""
59
+ parser = argparse.ArgumentParser(
60
+ description="OnlyOne — Fast duplicate file finder with safe deletion",
61
+ formatter_class=argparse.RawDescriptionHelpFormatter,
62
+ epilog="""
63
+ Examples:
64
+ # Basic usage - find duplicates in Downloads folder
65
+ %(prog)s -i ~/Downloads
66
+
67
+ # Filter files by size and extensions and find duplicates
68
+ %(prog)s -i .~/Downloads -m 500KB -M 10MB -x .jpg,.png
69
+
70
+ # Same as above + move duplicates to trash (with confirmation prompt)
71
+ %(prog)s -i .~/Downloads -m 500KB -M 10MB -x .jpg,.png --keep-one
72
+
73
+ # Same as above but without confirmation and with output to a file (for scripts)
74
+ %(prog)s -i .~/Downloads -m 500KB -M 10MB -x .jpg,.png --keep-one --force > ~/Downloads/report.txt
75
+
76
+ # For more information check official OnlyOne github page
77
+ """
78
+ )
79
+
80
+ # Required arguments
81
+ parser.add_argument(
82
+ "--input", "-i",
83
+ required=True,
84
+ type=str,
85
+ help="Input directory to scan for duplicates"
86
+ )
87
+
88
+ # Filtering options
89
+ parser.add_argument(
90
+ "--min-size", "-m",
91
+ default="0",
92
+ type=str,
93
+ metavar='',
94
+ help="Minimum file size (e.g., 500KB, 1MB). Default: 0"
95
+ )
96
+ parser.add_argument(
97
+ "--max-size", "-M",
98
+ default="100GB",
99
+ type=str,
100
+ metavar='',
101
+ help="Maximum file size (e.g., 10MB, 1GB). Default: 100GB"
102
+ )
103
+ parser.add_argument(
104
+ "--extensions", "-x",
105
+ default="",
106
+ type=str,
107
+ metavar='',
108
+ help="Comma-separated file extensions to include (e.g., .jpg,.png)"
109
+ )
110
+ parser.add_argument(
111
+ "--priority-dirs", '-p',
112
+ nargs="+",
113
+ default=[],
114
+ type=str,
115
+ metavar='',
116
+ dest="priority_dirs",
117
+ help="Directories with files to prioritize when deleting duplicates"
118
+ )
119
+
120
+ # Deduplication options
121
+ parser.add_argument(
122
+ "--mode",
123
+ choices=["fast", "normal", "full"],
124
+ default="normal",
125
+ type=str,
126
+ help="Deduplication mode: "
127
+ "fast (size + front hash), "
128
+ "normal (size + front/middle/end hashes), "
129
+ "full (size + full content hash). Default: normal"
130
+ )
131
+ parser.add_argument(
132
+ "--sort",
133
+ choices=["shortest-path", "shortest-filename"],
134
+ default="shortest-path",
135
+ type=str,
136
+ help="Sorting inside duplicate groups: "
137
+ "'shortest-path' (files closer to root first), "
138
+ "'shortest-filename' (shorter filenames first). Default: shortest-path"
139
+ )
140
+
141
+ # Actions
142
+ parser.add_argument(
143
+ "--keep-one",
144
+ action="store_true",
145
+ help="Keep one file per duplicate group and move the rest to trash. "
146
+ "Always shows preview before deletion for safety."
147
+ )
148
+
149
+ # Output options
150
+ parser.add_argument(
151
+ "--force",
152
+ action="store_true",
153
+ help="Skip confirmation prompt when used with --keep-one (for automation/scripts)"
154
+ )
155
+ parser.add_argument(
156
+ "--quiet", "-q",
157
+ action="store_true",
158
+ help="Suppress non-essential output"
159
+ )
160
+ parser.add_argument(
161
+ "--verbose", "-v",
162
+ action="store_true",
163
+ help="Show detailed statistics and progress"
164
+ )
165
+
166
+ return parser.parse_args()
167
+
168
+ def validate_args(self, args: argparse.Namespace) -> None:
169
+ """Validate command-line arguments before execution."""
170
+ if args.force and not args.keep_one:
171
+ self.error_exit("--force can only be used with --keep-one")
172
+
173
+ # Prevent interactive confirmation in non-TTY environments
174
+ if args.keep_one and not args.force:
175
+ if not sys.stdin.isatty() or not sys.stdout.isatty():
176
+ self.error_exit(
177
+ "Cannot request interactive confirmation in non-interactive session.\n"
178
+ "Use --force flag to proceed without confirmation when piping output or running in scripts."
179
+ )
180
+
181
+ root_path = Path(args.input).resolve()
182
+ if not root_path.exists():
183
+ self.error_exit(f"Directory not found: {args.input}")
184
+ if not root_path.is_dir():
185
+ self.error_exit(f"Path is not a directory: {args.input}")
186
+
187
+ # Validate size formats
188
+ try:
189
+ min_size = ConvertUtils.human_to_bytes(args.min_size)
190
+ max_size = ConvertUtils.human_to_bytes(args.max_size)
191
+ if min_size < 0:
192
+ self.error_exit("Minimum size cannot be negative")
193
+ if max_size < min_size:
194
+ self.error_exit("Maximum size cannot be less than minimum size")
195
+ except ValueError as e:
196
+ self.error_exit(f"Invalid size format: {e}")
197
+
198
+ # Validate favourite directories
199
+ for fav_dir in args.priority_dirs:
200
+ fav_path = Path(fav_dir).resolve()
201
+ if not fav_path.exists():
202
+ self.warning(f"Favourite directory not found: {fav_dir}")
203
+ elif not fav_path.is_dir():
204
+ self.warning(f"Favourite path is not a directory: {fav_dir}")
205
+
206
+ def create_params(self, args: argparse.Namespace) -> DeduplicationParams:
207
+ """Create DeduplicationParams from CLI arguments."""
208
+ try:
209
+ min_size_bytes = ConvertUtils.human_to_bytes(args.min_size)
210
+ max_size_bytes = ConvertUtils.human_to_bytes(args.max_size)
211
+
212
+ # Parse extensions
213
+ extensions = []
214
+ if args.extensions:
215
+ extensions = [
216
+ ext.strip().lower()
217
+ for ext in args.extensions.split(",")
218
+ if ext.strip()
219
+ ]
220
+ extensions = [
221
+ ext if ext.startswith(".") else f".{ext}"
222
+ for ext in extensions
223
+ ]
224
+
225
+ # Parse priority directories from CLI: support space-separated AND comma-separated
226
+ # convert it to internal favourite_dirs format for core engine
227
+ favourite_dirs = []
228
+ for item in args.priority_dirs:
229
+ favourite_dirs.extend([d.strip() for d in item.split(",") if d.strip()])
230
+ favourite_dirs = [str(Path(d).resolve()) for d in favourite_dirs]
231
+
232
+ # Map CLI sort option directly to core SortOrder enum values
233
+ sort_order = SortOrder(args.sort)
234
+
235
+ mode = DeduplicationMode[args.mode.upper()]
236
+
237
+ return DeduplicationParams(
238
+ root_dir=str(Path(args.input).resolve()),
239
+ min_size_bytes=min_size_bytes,
240
+ max_size_bytes=max_size_bytes,
241
+ extensions=extensions,
242
+ favourite_dirs=favourite_dirs,
243
+ sort_order=sort_order,
244
+ mode=mode
245
+ )
246
+ except ValueError as e:
247
+ self.error_exit(f"Parameter error: {e}")
248
+
249
+ def progress_callback(self, stage: str, current: int, total: Optional[int]) -> None:
250
+ """CLI progress callback - shows progress in console."""
251
+ if not self.verbose:
252
+ return
253
+
254
+ if total and total > 0:
255
+ percent = (current / total) * 100
256
+ sys.stderr.write(
257
+ f"\r [{stage}] {current}/{total} ({percent:.1f}%)"
258
+ )
259
+ sys.stderr.flush()
260
+ else:
261
+ sys.stderr.write(f"\r [{stage}] {current} files processed...")
262
+ sys.stderr.flush()
263
+
264
+ @staticmethod
265
+ def stopped_flag() -> bool:
266
+ """Check if operation should stop (placeholder for signal handling)."""
267
+ return False
268
+
269
+ @staticmethod
270
+ def calculate_space_savings(groups: List[DuplicateGroup], files_to_delete: List[str]) -> int:
271
+ """Calculate total space that would be freed by deleting files."""
272
+ total_bytes = 0
273
+ delete_set = set(files_to_delete)
274
+ for group in groups:
275
+ for file in group.files:
276
+ if file.path in delete_set:
277
+ total_bytes += file.size
278
+ return total_bytes
279
+
280
+ def run_deduplication(self, params: DeduplicationParams) -> List[DuplicateGroup]:
281
+ """Execute deduplication workflow."""
282
+ command = DeduplicationCommand()
283
+ if self.verbose:
284
+ mode_display = params.mode.value.capitalize()
285
+ print(f"Finding duplicates (mode: {mode_display})...")
286
+
287
+ try:
288
+ groups, stats = command.execute(
289
+ params,
290
+ progress_callback=self.progress_callback if self.verbose else None,
291
+ stopped_flag=self.stopped_flag
292
+ )
293
+
294
+ if self.verbose:
295
+ sys.stderr.write("\n")
296
+ print("\nDeduplication Statistics:")
297
+ print(stats.print_summary())
298
+
299
+ return groups
300
+ except Exception as e:
301
+ self.error_exit(f"Deduplication failed: {e}")
302
+
303
+ def output_results(self, groups: List[DuplicateGroup]) -> None:
304
+ """Output duplicate groups as plain text without additional sorting."""
305
+ if self.quiet:
306
+ return
307
+
308
+ if not groups:
309
+ print("No duplicate groups found.")
310
+ return
311
+
312
+ total_files = sum(len(g.files) for g in groups)
313
+ print(f"\nFound {len(groups)} duplicate groups ({total_files} files)")
314
+
315
+ for idx, group in enumerate(groups, 1):
316
+ size_str = ConvertUtils.bytes_to_human(group.size)
317
+ print(f"\n📁 Group {idx} | Size: {size_str} | Files: {len(group.files)}")
318
+
319
+ # Use order from core (already sorted by favourite dirs + sort_order)
320
+ for file in group.files:
321
+ fav_marker = " ✅" if file.is_from_fav_dir else ""
322
+ print(f" {file.path} [{ConvertUtils.bytes_to_human(file.size)}]{fav_marker}")
323
+
324
+ def execute_keep_one(self, groups: List[DuplicateGroup], params: DeduplicationParams, force: bool = False) -> None:
325
+ """Keep one file per group, delete the rest. Always shows preview before deletion."""
326
+ if not groups:
327
+ if not self.quiet:
328
+ print("No duplicate groups found.")
329
+ return
330
+
331
+ files_to_delete, _ = DuplicateService.keep_only_one_file_per_group(groups)
332
+
333
+ if not files_to_delete:
334
+ if not self.quiet:
335
+ print("No files to delete (all groups already have only one file).")
336
+ return
337
+
338
+ # Calculate space savings
339
+ space_saved = self.calculate_space_savings(groups, files_to_delete)
340
+ space_saved_str = ConvertUtils.bytes_to_human(space_saved)
341
+
342
+ # Always show deletion preview before action (safety first)
343
+ print()
344
+ preserved = len(groups)
345
+ for idx, group in enumerate(groups, 1):
346
+ size_str = ConvertUtils.bytes_to_human(group.size)
347
+ print(f"📁 Group {idx} | Total size: {size_str} | Files: {len(group.files)}")
348
+ print("-" * 60)
349
+
350
+ # File that will be preserved (first file after core sorting)
351
+ preserved_file = group.files[0]
352
+ fav_marker = " ⭐" if preserved_file.is_from_fav_dir else ""
353
+ print(f" [KEEP] {preserved_file.path}")
354
+ print(f" Size: {ConvertUtils.bytes_to_human(preserved_file.size)}{fav_marker}")
355
+
356
+ if preserved_file.is_from_fav_dir:
357
+ print(f" Reason: from favourite directory")
358
+ else:
359
+ # Show human-readable sort reason based on actual enum value
360
+ sort_reason = "shortest path" if params.sort_order == SortOrder.SHORTEST_PATH else "shortest filename"
361
+ print(f" Reason: {sort_reason}")
362
+
363
+ # Files that would be deleted
364
+ for file in group.files[1:]:
365
+ fav_marker = " ⭐" if file.is_from_fav_dir else ""
366
+ print(f" [DEL] {file.path}")
367
+ print(f" Size: {ConvertUtils.bytes_to_human(file.size)}{fav_marker}")
368
+ print()
369
+
370
+ print("=" * 60)
371
+ print(f"Summary: Keep 1 file per group ({preserved} files preserved, {len(files_to_delete)} files deleted)")
372
+ print(f"Total space saved: {space_saved_str}")
373
+ print()
374
+
375
+ # Skip confirmation if --force is used
376
+ if force:
377
+ print("⚠️ WARNING: --force flag skips confirmation. Proceeding with deletion...")
378
+ else:
379
+ # Safety check: confirm we're still in interactive mode
380
+ if not sys.stdin.isatty() or not sys.stdout.isatty():
381
+ self.error_exit(
382
+ "Lost interactive terminal during operation. "
383
+ "Use --force to proceed in non-interactive environments."
384
+ )
385
+
386
+ # Ask for confirmation before actual deletion
387
+ response = input(f"Are you sure you want to move {len(files_to_delete)} files to trash? [y/N]: ")
388
+ if response.strip().lower() not in ("y", "yes"):
389
+ print("Deletion cancelled by user.")
390
+ return
391
+
392
+ # Execute deletion with error resilience (continue on individual file errors)
393
+ print(f"\nMoving {len(files_to_delete)} files to trash...")
394
+ deleted_count = 0
395
+ failed_files = []
396
+
397
+ try:
398
+ for i, path in enumerate(files_to_delete, 1):
399
+ if self.verbose:
400
+ print(f" [{i}/{len(files_to_delete)}] {os.path.basename(path)}")
401
+
402
+ try:
403
+ FileService.move_to_trash(path)
404
+ deleted_count += 1
405
+ except Exception as e:
406
+ failed_files.append((path, str(e)))
407
+ self.warning(f"Failed to delete {path}: {e}")
408
+ continue # Continue with next file
409
+
410
+ # Report results
411
+ if failed_files:
412
+ print(f"\n⚠️ Partial success: {deleted_count}/{len(files_to_delete)} files moved to trash.")
413
+ print(f"Failed to delete {len(failed_files)} file(s):")
414
+ for path, error in failed_files[:5]: # Show first 5 errors
415
+ print(f" • {os.path.basename(path)}: {error.split(':')[-1].strip()}")
416
+ if len(failed_files) > 5:
417
+ print(f" ...and {len(failed_files) - 5} more files")
418
+ else:
419
+ print(f"✅ Successfully moved {deleted_count} files to trash.")
420
+ print(f"Total space saved: {space_saved_str}")
421
+
422
+ except KeyboardInterrupt:
423
+ print("\n⚠️ Operation cancelled by user (Ctrl+C)")
424
+ sys.exit(130)
425
+ except Exception as e:
426
+ self.error_exit(f"Failed during deletion process: {e}")
427
+
428
+ def warning(self, message: str) -> None:
429
+ """Print a warning message to stderr."""
430
+ if not self.quiet:
431
+ print(f"⚠️ {message}", file=sys.stderr)
432
+
433
+ @staticmethod
434
+ def error_exit(message: str, code: int = 1) -> NoReturn:
435
+ """Print error and exit."""
436
+ print(f"❌ Error: {message}", file=sys.stderr)
437
+ sys.exit(code)
438
+
439
+ def run(self) -> None:
440
+ """Main entry point with conditional output behavior."""
441
+ args = self.parse_args()
442
+ self.verbose = args.verbose
443
+ self.quiet = args.quiet
444
+
445
+ self.validate_args(args)
446
+ params = self.create_params(args)
447
+
448
+ if not self.quiet:
449
+ print(f"Scanning directory: {params.root_dir}")
450
+
451
+ groups = self.run_deduplication(params)
452
+
453
+ # Conditional output based on flags
454
+ if args.keep_one:
455
+ # Always show preview before deletion (safety first)
456
+ self.execute_keep_one(groups, params=params, force=args.force)
457
+ else:
458
+ # Show standard duplicate groups list
459
+ self.output_results(groups)
460
+
461
+ # Show completion time
462
+ elapsed = time.time() - self.start_time
463
+ if self.verbose:
464
+ print(f"\n✅ Completed in {elapsed:.2f} seconds")
465
+
466
+
467
+ def main() -> None:
468
+ """Application entry point."""
469
+ app = CLIApplication()
470
+ try:
471
+ app.run()
472
+ except KeyboardInterrupt:
473
+ print("\n⚠️ Operation cancelled by user (Ctrl+C)")
474
+ sys.exit(130)
475
+ except Exception as e:
476
+ if os.environ.get("DEBUG"):
477
+ raise
478
+ print(f"❌ Unexpected error: {e}", file=sys.stderr)
479
+ sys.exit(1)
480
+
481
+
482
+ if __name__ == "__main__":
483
+ main()
onlyone/commands.py ADDED
@@ -0,0 +1,92 @@
1
+ """
2
+ Unified command orchestrator for deduplication.
3
+ This is the SINGLE source of truth for business logic — used by both GUI and CLI.
4
+ No Qt/PySide6 dependencies — pure Python.
5
+ """
6
+ from typing import List, Optional, Callable, Tuple
7
+ from onlyone.core.models import DuplicateGroup, DeduplicationStats, DeduplicationParams, File
8
+ from onlyone.core.scanner import FileScannerImpl
9
+ from onlyone.core.deduplicator import DeduplicatorImpl
10
+
11
+ class DeduplicationCommand:
12
+ """
13
+ Orchestrates the entire deduplication workflow:
14
+ 1. Initialize app with root directory
15
+ 2. Set favourite directories
16
+ 3. Execute find_duplicates with progress/cancellation support
17
+
18
+ Usage:
19
+ # For GUI (with progress UI updates):
20
+ params = DeduplicationParams(...)
21
+ command = DeduplicationCommand()
22
+ groups, stats = command.execute(
23
+ params,
24
+ progress_callback=qt_progress_adapter,
25
+ stopped_flag=qt_cancellation_check
26
+ )
27
+
28
+ # For CLI (with console progress):
29
+ groups, stats = command.execute(
30
+ params,
31
+ progress_callback=cli_progress_printer,
32
+ stopped_flag=signal_handler_check
33
+ )
34
+ """
35
+
36
+ def __init__(self):
37
+ self._deduplicator = DeduplicatorImpl()
38
+ self._files: List[File] = [] # Local state storage (not in app/api layer)
39
+
40
+ def execute(
41
+ self,
42
+ params: DeduplicationParams,
43
+ progress_callback: Optional[Callable[[str, int, Optional[int]], None]] = None,
44
+ stopped_flag: Optional[Callable[[], bool]] = None
45
+ ) -> Tuple[List[DuplicateGroup], DeduplicationStats]:
46
+ """
47
+ Execute deduplication with given parameters.
48
+
49
+ Args:
50
+ params: Validated deduplication parameters
51
+ progress_callback: (stage: str, current: int, total: Optional[int]) -> None
52
+ stopped_flag: () -> bool (returns True if operation should stop)
53
+
54
+ Returns:
55
+ Tuple of (duplicate_groups, statistics)
56
+
57
+ Raises:
58
+ ValueError: If parameters are invalid
59
+ RuntimeError: If scanning/deduplication fails
60
+ """
61
+ # Step 1: Scan files using core scanner directly
62
+ scanner = FileScannerImpl(
63
+ root_dir=params.root_dir,
64
+ min_size=params.min_size_bytes,
65
+ max_size=params.max_size_bytes,
66
+ extensions=params.extensions,
67
+ favourite_dirs=params.favourite_dirs
68
+ )
69
+
70
+ file_collection = scanner.scan(
71
+ stopped_flag=stopped_flag,
72
+ progress_callback=progress_callback
73
+ )
74
+
75
+ self._files = file_collection.files
76
+
77
+ if not self._files:
78
+ raise RuntimeError("No files found matching filters")
79
+
80
+ # Step 2: Find duplicates using core onlyone directly
81
+ groups, stats = self._deduplicator.find_duplicates(
82
+ self._files,
83
+ params, # ← Unified params object with sort_order, mode, etc.
84
+ stopped_flag=stopped_flag,
85
+ progress_callback=progress_callback
86
+ )
87
+
88
+ return groups, stats
89
+
90
+ def get_files(self) -> List[File]:
91
+ """Get scanned files after execution."""
92
+ return self._files.copy() # Return copy to prevent external mutation
@@ -0,0 +1,38 @@
1
+ """
2
+ Core deduplication engine — scanner, hasher, grouper, and pipeline orchestrator.
3
+
4
+ This package contains the performance-critical foundation of the onlyone:
5
+ - FileScannerImpl: recursive directory traversal with size/extension filters
6
+ - HasherImpl + XXHashAlgorithmImpl: xxHash64-based partial/full content hashing
7
+ - FileGrouperImpl: size and hash-based grouping with duplicate filtering
8
+ - Deduplicator: multi-stage pipeline (size → partial hashes → full hash)
9
+ - Models: File, DuplicateGroup, and configuration objects
10
+
11
+ All components are pure Python with no GUI dependencies — suitable for CLI and server usage.
12
+ """
13
+
14
+ from .scanner import FileScannerImpl
15
+ from .grouper import FileGrouperImpl
16
+ from .hasher import HasherImpl, XXHashAlgorithmImpl
17
+ from .deduplicator import Deduplicator, DeduplicatorImpl
18
+ from .sorter import Sorter
19
+ from .models import (
20
+ File, DuplicateGroup, DeduplicationMode, DeduplicationParams, DeduplicationStats,
21
+ SortOrder, FileHashes)
22
+
23
+ __all__ = [
24
+ "FileScannerImpl",
25
+ "FileGrouperImpl",
26
+ "HasherImpl",
27
+ "XXHashAlgorithmImpl",
28
+ "Deduplicator",
29
+ "DeduplicatorImpl",
30
+ "File",
31
+ "DuplicateGroup",
32
+ "DeduplicationMode",
33
+ "DeduplicationParams",
34
+ "DeduplicationStats",
35
+ "SortOrder",
36
+ "FileHashes",
37
+ "Sorter"
38
+ ]