pystou 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cleanup/__init__.py +0 -0
- cleanup/main.py +310 -0
- common/__init__.py +0 -0
- common/cli.py +37 -0
- common/cursor.py +98 -0
- common/errors.py +9 -0
- common/fs_walker.py +178 -0
- common/indexer.py +178 -0
- common/interrupt.py +26 -0
- common/logger.py +67 -0
- common/safe_extract.py +100 -0
- common/safe_ops.py +47 -0
- common/utils.py +559 -0
- common/validation.py +46 -0
- dedup_folders/__init__.py +0 -0
- dedup_folders/main.py +346 -0
- empty/__init__.py +1 -0
- empty/main.py +308 -0
- extract/__init__.py +0 -0
- extract/main.py +394 -0
- identify/__init__.py +1 -0
- identify/main.py +401 -0
- pystou/__init__.py +3 -0
- pystou/main.py +129 -0
- pystou-0.1.0.dist-info/METADATA +392 -0
- pystou-0.1.0.dist-info/RECORD +31 -0
- pystou-0.1.0.dist-info/WHEEL +4 -0
- pystou-0.1.0.dist-info/entry_points.txt +2 -0
- pystou-0.1.0.dist-info/licenses/LICENSE +7 -0
- stats/__init__.py +1 -0
- stats/main.py +327 -0
cleanup/__init__.py
ADDED
|
File without changes
|
cleanup/main.py
ADDED
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Cleanup subcommand for removing junk files from directories."""
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import shutil
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
from common.cli import add_common_arguments
|
|
12
|
+
from common.interrupt import scanning
|
|
13
|
+
from common.logger import log_configuration, setup_logging
|
|
14
|
+
from common.validation import validate_directory_or_exit
|
|
15
|
+
|
|
16
|
+
# Default junk file patterns
|
|
17
|
+
JUNK_FILES: set[str] = {
|
|
18
|
+
".DS_Store",
|
|
19
|
+
"._.DS_Store",
|
|
20
|
+
"Thumbs.db",
|
|
21
|
+
"desktop.ini",
|
|
22
|
+
".Spotlight-V100",
|
|
23
|
+
".Trashes",
|
|
24
|
+
"ehthumbs.db",
|
|
25
|
+
"ehthumbs_vista.db",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
# Junk file prefixes (macOS resource forks)
|
|
29
|
+
JUNK_PREFIXES: set[str] = {
|
|
30
|
+
"._",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
# Junk directories
|
|
34
|
+
JUNK_DIRS: set[str] = {
|
|
35
|
+
"__MACOSX",
|
|
36
|
+
".AppleDouble",
|
|
37
|
+
".LSOverride",
|
|
38
|
+
".TemporaryItems",
|
|
39
|
+
".fseventsd",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def add_cleanup_arguments(parser: argparse.ArgumentParser) -> None:
|
|
44
|
+
"""Adds cleanup-specific arguments to the parser.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
parser: ArgumentParser to add arguments to.
|
|
48
|
+
"""
|
|
49
|
+
add_common_arguments(parser)
|
|
50
|
+
parser.add_argument(
|
|
51
|
+
"--include",
|
|
52
|
+
type=str,
|
|
53
|
+
action="append",
|
|
54
|
+
metavar="PATTERN",
|
|
55
|
+
help="Additional file/directory names to remove (can be used multiple times)",
|
|
56
|
+
)
|
|
57
|
+
parser.add_argument(
|
|
58
|
+
"--list-only",
|
|
59
|
+
action="store_true",
|
|
60
|
+
help="Only list junk files without removing them",
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def main(args: Optional[argparse.Namespace] = None) -> None:
|
|
65
|
+
"""Main entry point for cleanup.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
args: Parsed arguments. If None, parses from command line.
|
|
69
|
+
"""
|
|
70
|
+
if args is None:
|
|
71
|
+
parser = argparse.ArgumentParser(description="Cleanup junk files script.")
|
|
72
|
+
add_cleanup_arguments(parser)
|
|
73
|
+
args = parser.parse_args()
|
|
74
|
+
|
|
75
|
+
setup_logging("cleanup", args.log_dir)
|
|
76
|
+
log_configuration(args)
|
|
77
|
+
|
|
78
|
+
validate_directory_or_exit(args.directory)
|
|
79
|
+
|
|
80
|
+
# Build the set of patterns to match
|
|
81
|
+
junk_files = JUNK_FILES.copy()
|
|
82
|
+
junk_dirs = JUNK_DIRS.copy()
|
|
83
|
+
if args.include:
|
|
84
|
+
for pattern in args.include:
|
|
85
|
+
junk_files.add(pattern)
|
|
86
|
+
|
|
87
|
+
# Find junk files
|
|
88
|
+
with scanning("scan"):
|
|
89
|
+
junk_items = find_junk(args.directory, args.recursive, junk_files, junk_dirs)
|
|
90
|
+
|
|
91
|
+
if not junk_items:
|
|
92
|
+
print("No junk files found.")
|
|
93
|
+
logging.info({"action": "no_junk_found"})
|
|
94
|
+
return
|
|
95
|
+
|
|
96
|
+
print(f"Found {len(junk_items)} junk item(s):")
|
|
97
|
+
for item in junk_items:
|
|
98
|
+
print(f" {item}")
|
|
99
|
+
|
|
100
|
+
logging.info(
|
|
101
|
+
{
|
|
102
|
+
"action": "junk_found",
|
|
103
|
+
"count": len(junk_items),
|
|
104
|
+
"items": [str(i) for i in junk_items],
|
|
105
|
+
}
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
if args.list_only:
|
|
109
|
+
print("\n(Use without --list-only to remove)")
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
if args.dry_run:
|
|
113
|
+
print("\nDry run: would remove the above items")
|
|
114
|
+
logging.info({"action": "cleanup", "status": "dry_run"})
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
# Remove junk files
|
|
118
|
+
with scanning("removal"):
|
|
119
|
+
removed_count, skipped_count = remove_junk(junk_items)
|
|
120
|
+
|
|
121
|
+
print(f"\nRemoved {removed_count}/{len(junk_items)} item(s)")
|
|
122
|
+
if skipped_count > 0:
|
|
123
|
+
print(f"Skipped {skipped_count} item(s) due to errors")
|
|
124
|
+
logging.info(
|
|
125
|
+
{
|
|
126
|
+
"action": "cleanup_complete",
|
|
127
|
+
"removed": removed_count,
|
|
128
|
+
"skipped": skipped_count,
|
|
129
|
+
"total": len(junk_items),
|
|
130
|
+
}
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def find_junk(
|
|
135
|
+
directory: str,
|
|
136
|
+
recursive: bool,
|
|
137
|
+
junk_files: set[str],
|
|
138
|
+
junk_dirs: set[str],
|
|
139
|
+
) -> list[Path]:
|
|
140
|
+
"""Finds junk files and directories.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
directory: Directory to search.
|
|
144
|
+
recursive: Whether to search recursively.
|
|
145
|
+
junk_files: Set of junk file names.
|
|
146
|
+
junk_dirs: Set of junk directory names.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
List of paths to junk items.
|
|
150
|
+
"""
|
|
151
|
+
junk_items: list[Path] = []
|
|
152
|
+
directory_path = Path(directory)
|
|
153
|
+
scanned = 0
|
|
154
|
+
|
|
155
|
+
if recursive:
|
|
156
|
+
# followlinks=False prevents infinite loops from symlink cycles
|
|
157
|
+
for root, dirs, files in os.walk(directory_path, followlinks=False):
|
|
158
|
+
root_path = Path(root)
|
|
159
|
+
scanned += 1
|
|
160
|
+
|
|
161
|
+
# Progress indicator every 1000 directories
|
|
162
|
+
if scanned % 1000 == 0:
|
|
163
|
+
print(f"Scanned {scanned} directories...", end="\r")
|
|
164
|
+
|
|
165
|
+
# Check for junk directories
|
|
166
|
+
for dir_name in dirs[:]: # Copy to allow modification
|
|
167
|
+
dir_path = root_path / dir_name
|
|
168
|
+
# Skip symlinks to avoid issues
|
|
169
|
+
if dir_path.is_symlink():
|
|
170
|
+
continue
|
|
171
|
+
if dir_name in junk_dirs:
|
|
172
|
+
junk_items.append(dir_path)
|
|
173
|
+
dirs.remove(dir_name) # Don't descend into junk dirs
|
|
174
|
+
|
|
175
|
+
# Check for junk files
|
|
176
|
+
for file_name in files:
|
|
177
|
+
file_path = root_path / file_name
|
|
178
|
+
# Skip symlinks
|
|
179
|
+
if file_path.is_symlink():
|
|
180
|
+
continue
|
|
181
|
+
if is_junk_file(file_name, junk_files):
|
|
182
|
+
junk_items.append(file_path)
|
|
183
|
+
else:
|
|
184
|
+
try:
|
|
185
|
+
for entry in os.scandir(directory_path):
|
|
186
|
+
# Skip symlinks
|
|
187
|
+
if entry.is_symlink():
|
|
188
|
+
continue
|
|
189
|
+
if (entry.is_dir(follow_symlinks=False) and entry.name in junk_dirs) or (
|
|
190
|
+
entry.is_file(follow_symlinks=False) and is_junk_file(entry.name, junk_files)
|
|
191
|
+
):
|
|
192
|
+
junk_items.append(Path(entry.path))
|
|
193
|
+
except PermissionError as e:
|
|
194
|
+
print(f"Permission denied: {directory_path}")
|
|
195
|
+
logging.warning({"action": "scan_error", "path": str(directory_path), "error": str(e)})
|
|
196
|
+
|
|
197
|
+
if scanned >= 1000:
|
|
198
|
+
print(f"Scanned {scanned} directories. ") # Clear progress line
|
|
199
|
+
|
|
200
|
+
return junk_items
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def is_junk_file(filename: str, junk_files: set[str]) -> bool:
|
|
204
|
+
"""Checks if a filename is a junk file.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
filename: Name of the file.
|
|
208
|
+
junk_files: Set of junk file names.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
True if the file is junk, False otherwise.
|
|
212
|
+
"""
|
|
213
|
+
if filename in junk_files:
|
|
214
|
+
return True
|
|
215
|
+
|
|
216
|
+
# Check prefixes (e.g., ._ files)
|
|
217
|
+
return any(filename.startswith(prefix) for prefix in JUNK_PREFIXES)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def remove_junk(junk_items: list[Path]) -> tuple:
|
|
221
|
+
"""Removes junk files and directories.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
junk_items: List of paths to remove.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Tuple of (removed_count, skipped_count).
|
|
228
|
+
"""
|
|
229
|
+
removed = 0
|
|
230
|
+
skipped = 0
|
|
231
|
+
total = len(junk_items)
|
|
232
|
+
|
|
233
|
+
for i, item in enumerate(junk_items, 1):
|
|
234
|
+
# Progress indicator
|
|
235
|
+
if total > 10 and i % 10 == 0:
|
|
236
|
+
print(f"Removing {i}/{total}...", end="\r")
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
if not item.exists():
|
|
240
|
+
# File was already deleted (race condition)
|
|
241
|
+
logging.warning(
|
|
242
|
+
{
|
|
243
|
+
"action": "remove_junk",
|
|
244
|
+
"status": "already_deleted",
|
|
245
|
+
"path": str(item),
|
|
246
|
+
}
|
|
247
|
+
)
|
|
248
|
+
skipped += 1
|
|
249
|
+
continue
|
|
250
|
+
|
|
251
|
+
if item.is_symlink():
|
|
252
|
+
# Don't follow symlinks, just remove the link
|
|
253
|
+
item.unlink()
|
|
254
|
+
elif item.is_dir():
|
|
255
|
+
shutil.rmtree(item)
|
|
256
|
+
else:
|
|
257
|
+
item.unlink()
|
|
258
|
+
|
|
259
|
+
removed += 1
|
|
260
|
+
logging.info(
|
|
261
|
+
{
|
|
262
|
+
"action": "remove_junk",
|
|
263
|
+
"status": "success",
|
|
264
|
+
"path": str(item),
|
|
265
|
+
}
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
except FileNotFoundError:
|
|
269
|
+
# Race condition: file deleted between check and removal
|
|
270
|
+
logging.warning(
|
|
271
|
+
{
|
|
272
|
+
"action": "remove_junk",
|
|
273
|
+
"status": "not_found",
|
|
274
|
+
"path": str(item),
|
|
275
|
+
}
|
|
276
|
+
)
|
|
277
|
+
skipped += 1
|
|
278
|
+
|
|
279
|
+
except PermissionError as e:
|
|
280
|
+
print(f"Permission denied: {item}")
|
|
281
|
+
logging.error(
|
|
282
|
+
{
|
|
283
|
+
"action": "remove_junk",
|
|
284
|
+
"status": "permission_denied",
|
|
285
|
+
"path": str(item),
|
|
286
|
+
"error": str(e),
|
|
287
|
+
}
|
|
288
|
+
)
|
|
289
|
+
skipped += 1
|
|
290
|
+
|
|
291
|
+
except OSError as e:
|
|
292
|
+
print(f"Error removing {item}: {e}")
|
|
293
|
+
logging.error(
|
|
294
|
+
{
|
|
295
|
+
"action": "remove_junk",
|
|
296
|
+
"status": "error",
|
|
297
|
+
"path": str(item),
|
|
298
|
+
"error": str(e),
|
|
299
|
+
}
|
|
300
|
+
)
|
|
301
|
+
skipped += 1
|
|
302
|
+
|
|
303
|
+
if total > 10:
|
|
304
|
+
print(f"Removed {removed}/{total} items. ") # Clear progress line
|
|
305
|
+
|
|
306
|
+
return removed, skipped
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
if __name__ == "__main__":
|
|
310
|
+
main()
|
common/__init__.py
ADDED
|
File without changes
|
common/cli.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def add_common_arguments(parser: argparse.ArgumentParser) -> None:
|
|
5
|
+
"""Adds common arguments to an ArgumentParser.
|
|
6
|
+
|
|
7
|
+
Args:
|
|
8
|
+
parser: ArgumentParser to add arguments to.
|
|
9
|
+
"""
|
|
10
|
+
parser.add_argument(
|
|
11
|
+
"directory",
|
|
12
|
+
nargs="?",
|
|
13
|
+
default=".",
|
|
14
|
+
help="Directory to start from (default: current directory)",
|
|
15
|
+
)
|
|
16
|
+
parser.add_argument(
|
|
17
|
+
"-r",
|
|
18
|
+
"--recursive",
|
|
19
|
+
action="store_true",
|
|
20
|
+
help="Recursively process subdirectories",
|
|
21
|
+
)
|
|
22
|
+
parser.add_argument(
|
|
23
|
+
"-n",
|
|
24
|
+
"--dry-run",
|
|
25
|
+
action="store_true",
|
|
26
|
+
help="Perform a dry run (do not make any changes)",
|
|
27
|
+
)
|
|
28
|
+
parser.add_argument(
|
|
29
|
+
"--log-dir",
|
|
30
|
+
default=".",
|
|
31
|
+
help="Directory to store log files (default: current directory)",
|
|
32
|
+
)
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
"--db-dir",
|
|
35
|
+
default=".",
|
|
36
|
+
help="Directory to store index database (default: current directory)",
|
|
37
|
+
)
|
common/cursor.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Cursor utilities for terminal progress display."""
|
|
2
|
+
|
|
3
|
+
import atexit
|
|
4
|
+
import contextlib
|
|
5
|
+
import signal
|
|
6
|
+
import sys
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
# ANSI escape codes for cursor control
|
|
10
|
+
HIDE_CURSOR = "\033[?25l"
|
|
11
|
+
SHOW_CURSOR = "\033[?25h"
|
|
12
|
+
|
|
13
|
+
# Track cursor state
|
|
14
|
+
_cursor_hidden = False
|
|
15
|
+
_original_sigint: Optional[signal.Handlers] = None
|
|
16
|
+
_original_sigterm: Optional[signal.Handlers] = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def hide_cursor() -> None:
|
|
20
|
+
"""Hides the terminal cursor and registers cleanup handlers."""
|
|
21
|
+
global _cursor_hidden, _original_sigint, _original_sigterm
|
|
22
|
+
|
|
23
|
+
if _cursor_hidden:
|
|
24
|
+
return
|
|
25
|
+
|
|
26
|
+
# Only hide if stdout is a terminal
|
|
27
|
+
if not sys.stdout.isatty():
|
|
28
|
+
return
|
|
29
|
+
|
|
30
|
+
sys.stdout.write(HIDE_CURSOR)
|
|
31
|
+
sys.stdout.flush()
|
|
32
|
+
_cursor_hidden = True
|
|
33
|
+
|
|
34
|
+
# Register atexit handler for normal exit
|
|
35
|
+
atexit.register(show_cursor)
|
|
36
|
+
|
|
37
|
+
# Store original signal handlers and install our own
|
|
38
|
+
_original_sigint = signal.getsignal(signal.SIGINT)
|
|
39
|
+
_original_sigterm = signal.getsignal(signal.SIGTERM)
|
|
40
|
+
|
|
41
|
+
signal.signal(signal.SIGINT, _signal_handler)
|
|
42
|
+
signal.signal(signal.SIGTERM, _signal_handler)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def show_cursor() -> None:
|
|
46
|
+
"""Shows the terminal cursor and removes cleanup handlers."""
|
|
47
|
+
global _cursor_hidden, _original_sigint, _original_sigterm
|
|
48
|
+
|
|
49
|
+
if not _cursor_hidden:
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
# Only show if stdout is a terminal
|
|
53
|
+
if sys.stdout.isatty():
|
|
54
|
+
sys.stdout.write(SHOW_CURSOR)
|
|
55
|
+
sys.stdout.flush()
|
|
56
|
+
|
|
57
|
+
_cursor_hidden = False
|
|
58
|
+
|
|
59
|
+
# Unregister atexit handler
|
|
60
|
+
with contextlib.suppress(Exception):
|
|
61
|
+
atexit.unregister(show_cursor)
|
|
62
|
+
|
|
63
|
+
# Restore original signal handlers
|
|
64
|
+
if _original_sigint is not None:
|
|
65
|
+
with contextlib.suppress(Exception):
|
|
66
|
+
signal.signal(signal.SIGINT, _original_sigint)
|
|
67
|
+
_original_sigint = None
|
|
68
|
+
|
|
69
|
+
if _original_sigterm is not None:
|
|
70
|
+
with contextlib.suppress(Exception):
|
|
71
|
+
signal.signal(signal.SIGTERM, _original_sigterm)
|
|
72
|
+
_original_sigterm = None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _signal_handler(signum: int, frame) -> None:
|
|
76
|
+
"""Signal handler that restores cursor before re-raising."""
|
|
77
|
+
global _original_sigint, _original_sigterm
|
|
78
|
+
|
|
79
|
+
# Capture original handlers BEFORE show_cursor clears them
|
|
80
|
+
if signum == signal.SIGINT and _original_sigint is not None:
|
|
81
|
+
original = _original_sigint
|
|
82
|
+
elif signum == signal.SIGTERM and _original_sigterm is not None:
|
|
83
|
+
original = _original_sigterm
|
|
84
|
+
else:
|
|
85
|
+
original = signal.SIG_DFL
|
|
86
|
+
|
|
87
|
+
# Restore cursor (this clears _original_sigint/_original_sigterm)
|
|
88
|
+
show_cursor()
|
|
89
|
+
|
|
90
|
+
# Re-raise with original handler
|
|
91
|
+
if original == signal.SIG_DFL:
|
|
92
|
+
# Default behavior - raise KeyboardInterrupt for SIGINT
|
|
93
|
+
if signum == signal.SIGINT:
|
|
94
|
+
raise KeyboardInterrupt
|
|
95
|
+
else:
|
|
96
|
+
sys.exit(128 + signum)
|
|
97
|
+
elif original != signal.SIG_IGN and callable(original):
|
|
98
|
+
original(signum, frame)
|
common/errors.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Typed exceptions used to distinguish expected failures from bugs."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class PystouError(Exception):
|
|
5
|
+
"""Base class for expected, explained PyStou errors."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class InvalidDirectoryError(PystouError):
|
|
9
|
+
"""Raised when a target directory is missing or is not a directory."""
|
common/fs_walker.py
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import sqlite3
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ScanContext:
|
|
9
|
+
"""Context object to track scanning state efficiently."""
|
|
10
|
+
|
|
11
|
+
__slots__ = ("_last_update", "dir_count", "file_count", "update_interval")
|
|
12
|
+
|
|
13
|
+
def __init__(self, update_interval: int = 100):
|
|
14
|
+
self.dir_count = 0
|
|
15
|
+
self.file_count = 0
|
|
16
|
+
self.update_interval = update_interval
|
|
17
|
+
self._last_update = 0
|
|
18
|
+
|
|
19
|
+
def increment_dirs(self) -> None:
|
|
20
|
+
self.dir_count += 1
|
|
21
|
+
self._maybe_update_output()
|
|
22
|
+
|
|
23
|
+
def increment_files(self) -> None:
|
|
24
|
+
self.file_count += 1
|
|
25
|
+
self._maybe_update_output()
|
|
26
|
+
|
|
27
|
+
def _maybe_update_output(self) -> None:
|
|
28
|
+
total = self.dir_count + self.file_count
|
|
29
|
+
if total - self._last_update >= self.update_interval:
|
|
30
|
+
self._last_update = total
|
|
31
|
+
update_live_output(self.dir_count, self.file_count)
|
|
32
|
+
|
|
33
|
+
def final_update(self) -> None:
|
|
34
|
+
update_live_output(self.dir_count, self.file_count)
|
|
35
|
+
print() # Newline after scanning complete
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def collect_directories(
|
|
39
|
+
conn: sqlite3.Connection,
|
|
40
|
+
directory: str,
|
|
41
|
+
recursive: bool,
|
|
42
|
+
level: Optional[int] = None,
|
|
43
|
+
) -> None:
|
|
44
|
+
"""Scans the filesystem and populates the database.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
conn (sqlite3.Connection): SQLite database connection.
|
|
48
|
+
directory (str): Directory to start scanning from.
|
|
49
|
+
recursive (bool): Whether to scan directories recursively.
|
|
50
|
+
level (Optional[int]): Maximum depth level for recursion (default: unlimited).
|
|
51
|
+
"""
|
|
52
|
+
ctx = ScanContext(update_interval=100)
|
|
53
|
+
clear_database(conn)
|
|
54
|
+
scan_tree(Path(directory), conn, recursive, level, ctx)
|
|
55
|
+
ctx.final_update()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def scan_tree(
|
|
59
|
+
root_dir: Path,
|
|
60
|
+
conn: sqlite3.Connection,
|
|
61
|
+
recursive: bool,
|
|
62
|
+
level: Optional[int],
|
|
63
|
+
ctx: ScanContext,
|
|
64
|
+
) -> None:
|
|
65
|
+
"""Scans a tree iteratively (explicit stack avoids RecursionError on deep trees).
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
root_dir (Path): Directory to start from.
|
|
69
|
+
conn (sqlite3.Connection): SQLite database connection.
|
|
70
|
+
recursive (bool): Whether to scan recursively.
|
|
71
|
+
level (Optional[int]): Maximum depth level for recursion.
|
|
72
|
+
ctx (ScanContext): Scanning context for counters and output.
|
|
73
|
+
"""
|
|
74
|
+
stack: list[tuple[Path, int]] = [(root_dir, 1)]
|
|
75
|
+
while stack:
|
|
76
|
+
current_dir, current_level = stack.pop()
|
|
77
|
+
try:
|
|
78
|
+
with os.scandir(current_dir) as entries:
|
|
79
|
+
dir_entries: list[tuple[str, str, float]] = []
|
|
80
|
+
file_entries: list[tuple[str, str, int, float]] = []
|
|
81
|
+
subdirs: list[Path] = []
|
|
82
|
+
for entry in entries:
|
|
83
|
+
full_path = Path(entry.path)
|
|
84
|
+
try:
|
|
85
|
+
if entry.is_dir(follow_symlinks=False):
|
|
86
|
+
stat_info = entry.stat(follow_symlinks=False)
|
|
87
|
+
dir_entries.append(
|
|
88
|
+
(str(full_path), str(current_dir), stat_info.st_mtime)
|
|
89
|
+
)
|
|
90
|
+
ctx.increment_dirs()
|
|
91
|
+
if recursive and (level is None or current_level < level):
|
|
92
|
+
subdirs.append(full_path)
|
|
93
|
+
elif entry.is_file(follow_symlinks=False):
|
|
94
|
+
stat_info = entry.stat(follow_symlinks=False)
|
|
95
|
+
file_entries.append(
|
|
96
|
+
(
|
|
97
|
+
str(current_dir),
|
|
98
|
+
entry.name,
|
|
99
|
+
stat_info.st_size,
|
|
100
|
+
stat_info.st_mtime,
|
|
101
|
+
)
|
|
102
|
+
)
|
|
103
|
+
ctx.increment_files()
|
|
104
|
+
except OSError as e:
|
|
105
|
+
# One bad entry must not abort its siblings.
|
|
106
|
+
logging.warning(
|
|
107
|
+
{
|
|
108
|
+
"action": "scan_entry_error",
|
|
109
|
+
"path": str(full_path),
|
|
110
|
+
"error": str(e),
|
|
111
|
+
}
|
|
112
|
+
)
|
|
113
|
+
insert_entries(conn, dir_entries, file_entries)
|
|
114
|
+
# reversed() so siblings are popped in scandir order (matches the
|
|
115
|
+
# original recursive traversal).
|
|
116
|
+
for subdir in reversed(subdirs):
|
|
117
|
+
stack.append((subdir, current_level + 1))
|
|
118
|
+
except PermissionError as e:
|
|
119
|
+
print(f"\nPermission denied: {current_dir}")
|
|
120
|
+
logging.error({"action": "scan_error", "directory": str(current_dir), "error": str(e)})
|
|
121
|
+
except OSError as e:
|
|
122
|
+
logging.warning(
|
|
123
|
+
{"action": "scan_error", "directory": str(current_dir), "error": str(e)}
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def clear_database(conn: sqlite3.Connection) -> None:
|
|
128
|
+
"""Clears existing data from the database.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
conn (sqlite3.Connection): SQLite database connection.
|
|
132
|
+
"""
|
|
133
|
+
cursor = conn.cursor()
|
|
134
|
+
cursor.execute("DELETE FROM directories")
|
|
135
|
+
cursor.execute("DELETE FROM files")
|
|
136
|
+
conn.commit()
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def update_live_output(dir_count: int, file_count: int) -> None:
|
|
140
|
+
"""Updates the live scanning output.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
dir_count (int): Number of directories scanned.
|
|
144
|
+
file_count (int): Number of files scanned.
|
|
145
|
+
"""
|
|
146
|
+
formatted_dir_count = f"{dir_count:,}"
|
|
147
|
+
formatted_file_count = f"{file_count:,}"
|
|
148
|
+
print(
|
|
149
|
+
f"Scanning directories: {formatted_dir_count}, files: {formatted_file_count}",
|
|
150
|
+
end="\r",
|
|
151
|
+
flush=True,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def insert_entries(
|
|
156
|
+
conn: sqlite3.Connection,
|
|
157
|
+
dir_entries: list[tuple[str, str, float]],
|
|
158
|
+
file_entries: list[tuple[str, str, int, float]],
|
|
159
|
+
) -> None:
|
|
160
|
+
"""Inserts directory and file entries into the database.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
conn (sqlite3.Connection): SQLite database connection.
|
|
164
|
+
dir_entries (List[Tuple[str, str, float]]): List of directory entries.
|
|
165
|
+
file_entries (List[Tuple[str, str, int, float]]): List of file entries.
|
|
166
|
+
"""
|
|
167
|
+
cursor = conn.cursor()
|
|
168
|
+
if dir_entries:
|
|
169
|
+
cursor.executemany(
|
|
170
|
+
"INSERT OR IGNORE INTO directories (path, parent_path, mtime) VALUES (?, ?, ?)",
|
|
171
|
+
dir_entries,
|
|
172
|
+
)
|
|
173
|
+
if file_entries:
|
|
174
|
+
cursor.executemany(
|
|
175
|
+
"INSERT OR IGNORE INTO files (directory_path, name, size, mtime) VALUES (?, ?, ?, ?)",
|
|
176
|
+
file_entries,
|
|
177
|
+
)
|
|
178
|
+
conn.commit()
|