pystou 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cleanup/__init__.py ADDED
File without changes
cleanup/main.py ADDED
@@ -0,0 +1,310 @@
1
+ #!/usr/bin/env python3
2
+ """Cleanup subcommand for removing junk files from directories."""
3
+
4
+ import argparse
5
+ import logging
6
+ import os
7
+ import shutil
8
+ from pathlib import Path
9
+ from typing import Optional
10
+
11
+ from common.cli import add_common_arguments
12
+ from common.interrupt import scanning
13
+ from common.logger import log_configuration, setup_logging
14
+ from common.validation import validate_directory_or_exit
15
+
16
+ # Default junk file patterns
17
+ JUNK_FILES: set[str] = {
18
+ ".DS_Store",
19
+ "._.DS_Store",
20
+ "Thumbs.db",
21
+ "desktop.ini",
22
+ ".Spotlight-V100",
23
+ ".Trashes",
24
+ "ehthumbs.db",
25
+ "ehthumbs_vista.db",
26
+ }
27
+
28
+ # Junk file prefixes (macOS resource forks)
29
+ JUNK_PREFIXES: set[str] = {
30
+ "._",
31
+ }
32
+
33
+ # Junk directories
34
+ JUNK_DIRS: set[str] = {
35
+ "__MACOSX",
36
+ ".AppleDouble",
37
+ ".LSOverride",
38
+ ".TemporaryItems",
39
+ ".fseventsd",
40
+ }
41
+
42
+
43
+ def add_cleanup_arguments(parser: argparse.ArgumentParser) -> None:
44
+ """Adds cleanup-specific arguments to the parser.
45
+
46
+ Args:
47
+ parser: ArgumentParser to add arguments to.
48
+ """
49
+ add_common_arguments(parser)
50
+ parser.add_argument(
51
+ "--include",
52
+ type=str,
53
+ action="append",
54
+ metavar="PATTERN",
55
+ help="Additional file/directory names to remove (can be used multiple times)",
56
+ )
57
+ parser.add_argument(
58
+ "--list-only",
59
+ action="store_true",
60
+ help="Only list junk files without removing them",
61
+ )
62
+
63
+
64
+ def main(args: Optional[argparse.Namespace] = None) -> None:
65
+ """Main entry point for cleanup.
66
+
67
+ Args:
68
+ args: Parsed arguments. If None, parses from command line.
69
+ """
70
+ if args is None:
71
+ parser = argparse.ArgumentParser(description="Cleanup junk files script.")
72
+ add_cleanup_arguments(parser)
73
+ args = parser.parse_args()
74
+
75
+ setup_logging("cleanup", args.log_dir)
76
+ log_configuration(args)
77
+
78
+ validate_directory_or_exit(args.directory)
79
+
80
+ # Build the set of patterns to match
81
+ junk_files = JUNK_FILES.copy()
82
+ junk_dirs = JUNK_DIRS.copy()
83
+ if args.include:
84
+ for pattern in args.include:
85
+ junk_files.add(pattern)
86
+
87
+ # Find junk files
88
+ with scanning("scan"):
89
+ junk_items = find_junk(args.directory, args.recursive, junk_files, junk_dirs)
90
+
91
+ if not junk_items:
92
+ print("No junk files found.")
93
+ logging.info({"action": "no_junk_found"})
94
+ return
95
+
96
+ print(f"Found {len(junk_items)} junk item(s):")
97
+ for item in junk_items:
98
+ print(f" {item}")
99
+
100
+ logging.info(
101
+ {
102
+ "action": "junk_found",
103
+ "count": len(junk_items),
104
+ "items": [str(i) for i in junk_items],
105
+ }
106
+ )
107
+
108
+ if args.list_only:
109
+ print("\n(Use without --list-only to remove)")
110
+ return
111
+
112
+ if args.dry_run:
113
+ print("\nDry run: would remove the above items")
114
+ logging.info({"action": "cleanup", "status": "dry_run"})
115
+ return
116
+
117
+ # Remove junk files
118
+ with scanning("removal"):
119
+ removed_count, skipped_count = remove_junk(junk_items)
120
+
121
+ print(f"\nRemoved {removed_count}/{len(junk_items)} item(s)")
122
+ if skipped_count > 0:
123
+ print(f"Skipped {skipped_count} item(s) due to errors")
124
+ logging.info(
125
+ {
126
+ "action": "cleanup_complete",
127
+ "removed": removed_count,
128
+ "skipped": skipped_count,
129
+ "total": len(junk_items),
130
+ }
131
+ )
132
+
133
+
134
+ def find_junk(
135
+ directory: str,
136
+ recursive: bool,
137
+ junk_files: set[str],
138
+ junk_dirs: set[str],
139
+ ) -> list[Path]:
140
+ """Finds junk files and directories.
141
+
142
+ Args:
143
+ directory: Directory to search.
144
+ recursive: Whether to search recursively.
145
+ junk_files: Set of junk file names.
146
+ junk_dirs: Set of junk directory names.
147
+
148
+ Returns:
149
+ List of paths to junk items.
150
+ """
151
+ junk_items: list[Path] = []
152
+ directory_path = Path(directory)
153
+ scanned = 0
154
+
155
+ if recursive:
156
+ # followlinks=False prevents infinite loops from symlink cycles
157
+ for root, dirs, files in os.walk(directory_path, followlinks=False):
158
+ root_path = Path(root)
159
+ scanned += 1
160
+
161
+ # Progress indicator every 1000 directories
162
+ if scanned % 1000 == 0:
163
+ print(f"Scanned {scanned} directories...", end="\r")
164
+
165
+ # Check for junk directories
166
+ for dir_name in dirs[:]: # Copy to allow modification
167
+ dir_path = root_path / dir_name
168
+ # Skip symlinks to avoid issues
169
+ if dir_path.is_symlink():
170
+ continue
171
+ if dir_name in junk_dirs:
172
+ junk_items.append(dir_path)
173
+ dirs.remove(dir_name) # Don't descend into junk dirs
174
+
175
+ # Check for junk files
176
+ for file_name in files:
177
+ file_path = root_path / file_name
178
+ # Skip symlinks
179
+ if file_path.is_symlink():
180
+ continue
181
+ if is_junk_file(file_name, junk_files):
182
+ junk_items.append(file_path)
183
+ else:
184
+ try:
185
+ for entry in os.scandir(directory_path):
186
+ # Skip symlinks
187
+ if entry.is_symlink():
188
+ continue
189
+ if (entry.is_dir(follow_symlinks=False) and entry.name in junk_dirs) or (
190
+ entry.is_file(follow_symlinks=False) and is_junk_file(entry.name, junk_files)
191
+ ):
192
+ junk_items.append(Path(entry.path))
193
+ except PermissionError as e:
194
+ print(f"Permission denied: {directory_path}")
195
+ logging.warning({"action": "scan_error", "path": str(directory_path), "error": str(e)})
196
+
197
+ if scanned >= 1000:
198
+ print(f"Scanned {scanned} directories. ") # Clear progress line
199
+
200
+ return junk_items
201
+
202
+
203
+ def is_junk_file(filename: str, junk_files: set[str]) -> bool:
204
+ """Checks if a filename is a junk file.
205
+
206
+ Args:
207
+ filename: Name of the file.
208
+ junk_files: Set of junk file names.
209
+
210
+ Returns:
211
+ True if the file is junk, False otherwise.
212
+ """
213
+ if filename in junk_files:
214
+ return True
215
+
216
+ # Check prefixes (e.g., ._ files)
217
+ return any(filename.startswith(prefix) for prefix in JUNK_PREFIXES)
218
+
219
+
220
+ def remove_junk(junk_items: list[Path]) -> tuple:
221
+ """Removes junk files and directories.
222
+
223
+ Args:
224
+ junk_items: List of paths to remove.
225
+
226
+ Returns:
227
+ Tuple of (removed_count, skipped_count).
228
+ """
229
+ removed = 0
230
+ skipped = 0
231
+ total = len(junk_items)
232
+
233
+ for i, item in enumerate(junk_items, 1):
234
+ # Progress indicator
235
+ if total > 10 and i % 10 == 0:
236
+ print(f"Removing {i}/{total}...", end="\r")
237
+
238
+ try:
239
+ if not item.exists():
240
+ # File was already deleted (race condition)
241
+ logging.warning(
242
+ {
243
+ "action": "remove_junk",
244
+ "status": "already_deleted",
245
+ "path": str(item),
246
+ }
247
+ )
248
+ skipped += 1
249
+ continue
250
+
251
+ if item.is_symlink():
252
+ # Don't follow symlinks, just remove the link
253
+ item.unlink()
254
+ elif item.is_dir():
255
+ shutil.rmtree(item)
256
+ else:
257
+ item.unlink()
258
+
259
+ removed += 1
260
+ logging.info(
261
+ {
262
+ "action": "remove_junk",
263
+ "status": "success",
264
+ "path": str(item),
265
+ }
266
+ )
267
+
268
+ except FileNotFoundError:
269
+ # Race condition: file deleted between check and removal
270
+ logging.warning(
271
+ {
272
+ "action": "remove_junk",
273
+ "status": "not_found",
274
+ "path": str(item),
275
+ }
276
+ )
277
+ skipped += 1
278
+
279
+ except PermissionError as e:
280
+ print(f"Permission denied: {item}")
281
+ logging.error(
282
+ {
283
+ "action": "remove_junk",
284
+ "status": "permission_denied",
285
+ "path": str(item),
286
+ "error": str(e),
287
+ }
288
+ )
289
+ skipped += 1
290
+
291
+ except OSError as e:
292
+ print(f"Error removing {item}: {e}")
293
+ logging.error(
294
+ {
295
+ "action": "remove_junk",
296
+ "status": "error",
297
+ "path": str(item),
298
+ "error": str(e),
299
+ }
300
+ )
301
+ skipped += 1
302
+
303
+ if total > 10:
304
+ print(f"Removed {removed}/{total} items. ") # Clear progress line
305
+
306
+ return removed, skipped
307
+
308
+
309
+ if __name__ == "__main__":
310
+ main()
common/__init__.py ADDED
File without changes
common/cli.py ADDED
@@ -0,0 +1,37 @@
1
+ import argparse
2
+
3
+
4
+ def add_common_arguments(parser: argparse.ArgumentParser) -> None:
5
+ """Adds common arguments to an ArgumentParser.
6
+
7
+ Args:
8
+ parser: ArgumentParser to add arguments to.
9
+ """
10
+ parser.add_argument(
11
+ "directory",
12
+ nargs="?",
13
+ default=".",
14
+ help="Directory to start from (default: current directory)",
15
+ )
16
+ parser.add_argument(
17
+ "-r",
18
+ "--recursive",
19
+ action="store_true",
20
+ help="Recursively process subdirectories",
21
+ )
22
+ parser.add_argument(
23
+ "-n",
24
+ "--dry-run",
25
+ action="store_true",
26
+ help="Perform a dry run (do not make any changes)",
27
+ )
28
+ parser.add_argument(
29
+ "--log-dir",
30
+ default=".",
31
+ help="Directory to store log files (default: current directory)",
32
+ )
33
+ parser.add_argument(
34
+ "--db-dir",
35
+ default=".",
36
+ help="Directory to store index database (default: current directory)",
37
+ )
common/cursor.py ADDED
@@ -0,0 +1,98 @@
1
+ """Cursor utilities for terminal progress display."""
2
+
3
+ import atexit
4
+ import contextlib
5
+ import signal
6
+ import sys
7
+ from typing import Optional
8
+
9
+ # ANSI escape codes for cursor control
10
+ HIDE_CURSOR = "\033[?25l"
11
+ SHOW_CURSOR = "\033[?25h"
12
+
13
+ # Track cursor state
14
+ _cursor_hidden = False
15
+ _original_sigint: Optional[signal.Handlers] = None
16
+ _original_sigterm: Optional[signal.Handlers] = None
17
+
18
+
19
+ def hide_cursor() -> None:
20
+ """Hides the terminal cursor and registers cleanup handlers."""
21
+ global _cursor_hidden, _original_sigint, _original_sigterm
22
+
23
+ if _cursor_hidden:
24
+ return
25
+
26
+ # Only hide if stdout is a terminal
27
+ if not sys.stdout.isatty():
28
+ return
29
+
30
+ sys.stdout.write(HIDE_CURSOR)
31
+ sys.stdout.flush()
32
+ _cursor_hidden = True
33
+
34
+ # Register atexit handler for normal exit
35
+ atexit.register(show_cursor)
36
+
37
+ # Store original signal handlers and install our own
38
+ _original_sigint = signal.getsignal(signal.SIGINT)
39
+ _original_sigterm = signal.getsignal(signal.SIGTERM)
40
+
41
+ signal.signal(signal.SIGINT, _signal_handler)
42
+ signal.signal(signal.SIGTERM, _signal_handler)
43
+
44
+
45
+ def show_cursor() -> None:
46
+ """Shows the terminal cursor and removes cleanup handlers."""
47
+ global _cursor_hidden, _original_sigint, _original_sigterm
48
+
49
+ if not _cursor_hidden:
50
+ return
51
+
52
+ # Only show if stdout is a terminal
53
+ if sys.stdout.isatty():
54
+ sys.stdout.write(SHOW_CURSOR)
55
+ sys.stdout.flush()
56
+
57
+ _cursor_hidden = False
58
+
59
+ # Unregister atexit handler
60
+ with contextlib.suppress(Exception):
61
+ atexit.unregister(show_cursor)
62
+
63
+ # Restore original signal handlers
64
+ if _original_sigint is not None:
65
+ with contextlib.suppress(Exception):
66
+ signal.signal(signal.SIGINT, _original_sigint)
67
+ _original_sigint = None
68
+
69
+ if _original_sigterm is not None:
70
+ with contextlib.suppress(Exception):
71
+ signal.signal(signal.SIGTERM, _original_sigterm)
72
+ _original_sigterm = None
73
+
74
+
75
+ def _signal_handler(signum: int, frame) -> None:
76
+ """Signal handler that restores cursor before re-raising."""
77
+ global _original_sigint, _original_sigterm
78
+
79
+ # Capture original handlers BEFORE show_cursor clears them
80
+ if signum == signal.SIGINT and _original_sigint is not None:
81
+ original = _original_sigint
82
+ elif signum == signal.SIGTERM and _original_sigterm is not None:
83
+ original = _original_sigterm
84
+ else:
85
+ original = signal.SIG_DFL
86
+
87
+ # Restore cursor (this clears _original_sigint/_original_sigterm)
88
+ show_cursor()
89
+
90
+ # Re-raise with original handler
91
+ if original == signal.SIG_DFL:
92
+ # Default behavior - raise KeyboardInterrupt for SIGINT
93
+ if signum == signal.SIGINT:
94
+ raise KeyboardInterrupt
95
+ else:
96
+ sys.exit(128 + signum)
97
+ elif original != signal.SIG_IGN and callable(original):
98
+ original(signum, frame)
common/errors.py ADDED
@@ -0,0 +1,9 @@
1
+ """Typed exceptions used to distinguish expected failures from bugs."""
2
+
3
+
4
+ class PystouError(Exception):
5
+ """Base class for expected, explained PyStou errors."""
6
+
7
+
8
+ class InvalidDirectoryError(PystouError):
9
+ """Raised when a target directory is missing or is not a directory."""
common/fs_walker.py ADDED
@@ -0,0 +1,178 @@
1
+ import logging
2
+ import os
3
+ import sqlite3
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+
8
+ class ScanContext:
9
+ """Context object to track scanning state efficiently."""
10
+
11
+ __slots__ = ("_last_update", "dir_count", "file_count", "update_interval")
12
+
13
+ def __init__(self, update_interval: int = 100):
14
+ self.dir_count = 0
15
+ self.file_count = 0
16
+ self.update_interval = update_interval
17
+ self._last_update = 0
18
+
19
+ def increment_dirs(self) -> None:
20
+ self.dir_count += 1
21
+ self._maybe_update_output()
22
+
23
+ def increment_files(self) -> None:
24
+ self.file_count += 1
25
+ self._maybe_update_output()
26
+
27
+ def _maybe_update_output(self) -> None:
28
+ total = self.dir_count + self.file_count
29
+ if total - self._last_update >= self.update_interval:
30
+ self._last_update = total
31
+ update_live_output(self.dir_count, self.file_count)
32
+
33
+ def final_update(self) -> None:
34
+ update_live_output(self.dir_count, self.file_count)
35
+ print() # Newline after scanning complete
36
+
37
+
38
+ def collect_directories(
39
+ conn: sqlite3.Connection,
40
+ directory: str,
41
+ recursive: bool,
42
+ level: Optional[int] = None,
43
+ ) -> None:
44
+ """Scans the filesystem and populates the database.
45
+
46
+ Args:
47
+ conn (sqlite3.Connection): SQLite database connection.
48
+ directory (str): Directory to start scanning from.
49
+ recursive (bool): Whether to scan directories recursively.
50
+ level (Optional[int]): Maximum depth level for recursion (default: unlimited).
51
+ """
52
+ ctx = ScanContext(update_interval=100)
53
+ clear_database(conn)
54
+ scan_tree(Path(directory), conn, recursive, level, ctx)
55
+ ctx.final_update()
56
+
57
+
58
+ def scan_tree(
59
+ root_dir: Path,
60
+ conn: sqlite3.Connection,
61
+ recursive: bool,
62
+ level: Optional[int],
63
+ ctx: ScanContext,
64
+ ) -> None:
65
+ """Scans a tree iteratively (explicit stack avoids RecursionError on deep trees).
66
+
67
+ Args:
68
+ root_dir (Path): Directory to start from.
69
+ conn (sqlite3.Connection): SQLite database connection.
70
+ recursive (bool): Whether to scan recursively.
71
+ level (Optional[int]): Maximum depth level for recursion.
72
+ ctx (ScanContext): Scanning context for counters and output.
73
+ """
74
+ stack: list[tuple[Path, int]] = [(root_dir, 1)]
75
+ while stack:
76
+ current_dir, current_level = stack.pop()
77
+ try:
78
+ with os.scandir(current_dir) as entries:
79
+ dir_entries: list[tuple[str, str, float]] = []
80
+ file_entries: list[tuple[str, str, int, float]] = []
81
+ subdirs: list[Path] = []
82
+ for entry in entries:
83
+ full_path = Path(entry.path)
84
+ try:
85
+ if entry.is_dir(follow_symlinks=False):
86
+ stat_info = entry.stat(follow_symlinks=False)
87
+ dir_entries.append(
88
+ (str(full_path), str(current_dir), stat_info.st_mtime)
89
+ )
90
+ ctx.increment_dirs()
91
+ if recursive and (level is None or current_level < level):
92
+ subdirs.append(full_path)
93
+ elif entry.is_file(follow_symlinks=False):
94
+ stat_info = entry.stat(follow_symlinks=False)
95
+ file_entries.append(
96
+ (
97
+ str(current_dir),
98
+ entry.name,
99
+ stat_info.st_size,
100
+ stat_info.st_mtime,
101
+ )
102
+ )
103
+ ctx.increment_files()
104
+ except OSError as e:
105
+ # One bad entry must not abort its siblings.
106
+ logging.warning(
107
+ {
108
+ "action": "scan_entry_error",
109
+ "path": str(full_path),
110
+ "error": str(e),
111
+ }
112
+ )
113
+ insert_entries(conn, dir_entries, file_entries)
114
+ # reversed() so siblings are popped in scandir order (matches the
115
+ # original recursive traversal).
116
+ for subdir in reversed(subdirs):
117
+ stack.append((subdir, current_level + 1))
118
+ except PermissionError as e:
119
+ print(f"\nPermission denied: {current_dir}")
120
+ logging.error({"action": "scan_error", "directory": str(current_dir), "error": str(e)})
121
+ except OSError as e:
122
+ logging.warning(
123
+ {"action": "scan_error", "directory": str(current_dir), "error": str(e)}
124
+ )
125
+
126
+
127
+ def clear_database(conn: sqlite3.Connection) -> None:
128
+ """Clears existing data from the database.
129
+
130
+ Args:
131
+ conn (sqlite3.Connection): SQLite database connection.
132
+ """
133
+ cursor = conn.cursor()
134
+ cursor.execute("DELETE FROM directories")
135
+ cursor.execute("DELETE FROM files")
136
+ conn.commit()
137
+
138
+
139
+ def update_live_output(dir_count: int, file_count: int) -> None:
140
+ """Updates the live scanning output.
141
+
142
+ Args:
143
+ dir_count (int): Number of directories scanned.
144
+ file_count (int): Number of files scanned.
145
+ """
146
+ formatted_dir_count = f"{dir_count:,}"
147
+ formatted_file_count = f"{file_count:,}"
148
+ print(
149
+ f"Scanning directories: {formatted_dir_count}, files: {formatted_file_count}",
150
+ end="\r",
151
+ flush=True,
152
+ )
153
+
154
+
155
+ def insert_entries(
156
+ conn: sqlite3.Connection,
157
+ dir_entries: list[tuple[str, str, float]],
158
+ file_entries: list[tuple[str, str, int, float]],
159
+ ) -> None:
160
+ """Inserts directory and file entries into the database.
161
+
162
+ Args:
163
+ conn (sqlite3.Connection): SQLite database connection.
164
+ dir_entries (List[Tuple[str, str, float]]): List of directory entries.
165
+ file_entries (List[Tuple[str, str, int, float]]): List of file entries.
166
+ """
167
+ cursor = conn.cursor()
168
+ if dir_entries:
169
+ cursor.executemany(
170
+ "INSERT OR IGNORE INTO directories (path, parent_path, mtime) VALUES (?, ?, ?)",
171
+ dir_entries,
172
+ )
173
+ if file_entries:
174
+ cursor.executemany(
175
+ "INSERT OR IGNORE INTO files (directory_path, name, size, mtime) VALUES (?, ?, ?, ?)",
176
+ file_entries,
177
+ )
178
+ conn.commit()