PyHardLinkBackup 1.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyHardLinkBackup/__init__.py +7 -0
- PyHardLinkBackup/__main__.py +10 -0
- PyHardLinkBackup/backup.py +297 -0
- PyHardLinkBackup/cli_app/__init__.py +41 -0
- PyHardLinkBackup/cli_app/phlb.py +136 -0
- PyHardLinkBackup/cli_dev/__init__.py +70 -0
- PyHardLinkBackup/cli_dev/__main__.py +10 -0
- PyHardLinkBackup/cli_dev/benchmark.py +138 -0
- PyHardLinkBackup/cli_dev/code_style.py +12 -0
- PyHardLinkBackup/cli_dev/debugging.py +47 -0
- PyHardLinkBackup/cli_dev/packaging.py +62 -0
- PyHardLinkBackup/cli_dev/shell_completion.py +23 -0
- PyHardLinkBackup/cli_dev/testing.py +52 -0
- PyHardLinkBackup/cli_dev/update_readme_history.py +33 -0
- PyHardLinkBackup/compare_backup.py +259 -0
- PyHardLinkBackup/constants.py +18 -0
- PyHardLinkBackup/logging_setup.py +124 -0
- PyHardLinkBackup/rebuild_databases.py +217 -0
- PyHardLinkBackup/tests/__init__.py +36 -0
- PyHardLinkBackup/tests/test_backup.py +1167 -0
- PyHardLinkBackup/tests/test_compare_backup.py +167 -0
- PyHardLinkBackup/tests/test_doc_write.py +26 -0
- PyHardLinkBackup/tests/test_doctests.py +10 -0
- PyHardLinkBackup/tests/test_project_setup.py +46 -0
- PyHardLinkBackup/tests/test_readme.py +75 -0
- PyHardLinkBackup/tests/test_readme_history.py +9 -0
- PyHardLinkBackup/tests/test_rebuild_database.py +266 -0
- PyHardLinkBackup/utilities/__init__.py +0 -0
- PyHardLinkBackup/utilities/file_hash_database.py +62 -0
- PyHardLinkBackup/utilities/file_size_database.py +46 -0
- PyHardLinkBackup/utilities/filesystem.py +257 -0
- PyHardLinkBackup/utilities/humanize.py +39 -0
- PyHardLinkBackup/utilities/rich_utils.py +237 -0
- PyHardLinkBackup/utilities/sha256sums.py +61 -0
- PyHardLinkBackup/utilities/tee.py +40 -0
- PyHardLinkBackup/utilities/tests/__init__.py +0 -0
- PyHardLinkBackup/utilities/tests/test_file_hash_database.py +153 -0
- PyHardLinkBackup/utilities/tests/test_file_size_database.py +151 -0
- PyHardLinkBackup/utilities/tests/test_filesystem.py +167 -0
- PyHardLinkBackup/utilities/tests/unittest_utilities.py +78 -0
- PyHardLinkBackup/utilities/tyro_cli_shared_args.py +29 -0
- pyhardlinkbackup-1.8.1.dist-info/METADATA +700 -0
- pyhardlinkbackup-1.8.1.dist-info/RECORD +45 -0
- pyhardlinkbackup-1.8.1.dist-info/WHEEL +4 -0
- pyhardlinkbackup-1.8.1.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Annotated, Literal
|
|
5
|
+
|
|
6
|
+
import tyro
|
|
7
|
+
from bx_py_utils.path import assert_is_dir
|
|
8
|
+
from rich import (
|
|
9
|
+
get_console,
|
|
10
|
+
print, # noqa
|
|
11
|
+
)
|
|
12
|
+
from rich.logging import RichHandler
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
LogLevelLiteral = Literal['debug', 'info', 'warning', 'error']
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
TyroConsoleLogLevelArgType = Annotated[
|
|
21
|
+
LogLevelLiteral,
|
|
22
|
+
tyro.conf.arg(
|
|
23
|
+
help='Log level for console logging.',
|
|
24
|
+
),
|
|
25
|
+
]
|
|
26
|
+
DEFAULT_CONSOLE_LOG_LEVEL: TyroConsoleLogLevelArgType = 'warning'
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
TyroLogFileLevelArgType = Annotated[
|
|
30
|
+
LogLevelLiteral,
|
|
31
|
+
tyro.conf.arg(
|
|
32
|
+
help='Log level for the log file',
|
|
33
|
+
),
|
|
34
|
+
]
|
|
35
|
+
DEFAULT_LOG_FILE_LEVEL: TyroLogFileLevelArgType = 'info'
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def log_level_name2int(level_name: str) -> int:
|
|
39
|
+
level_name = level_name.upper()
|
|
40
|
+
level_mapping = logging.getLevelNamesMapping()
|
|
41
|
+
try:
|
|
42
|
+
return level_mapping[level_name]
|
|
43
|
+
except KeyError as err:
|
|
44
|
+
raise ValueError(f'Invalid log level name: {level_name}') from err
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
console = get_console()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class LoggingManager:
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
*,
|
|
54
|
+
console_level: TyroConsoleLogLevelArgType,
|
|
55
|
+
file_level: TyroLogFileLevelArgType,
|
|
56
|
+
):
|
|
57
|
+
self.console_level_name = console_level
|
|
58
|
+
self.console_level: int = log_level_name2int(console_level)
|
|
59
|
+
self.file_level_name = file_level
|
|
60
|
+
self.file_level: int = log_level_name2int(file_level)
|
|
61
|
+
|
|
62
|
+
self.lowest_level = min(self.console_level, self.file_level)
|
|
63
|
+
|
|
64
|
+
if console_level == logging.DEBUG:
|
|
65
|
+
log_format = '(%(name)s) %(message)s'
|
|
66
|
+
else:
|
|
67
|
+
log_format = '%(message)s'
|
|
68
|
+
|
|
69
|
+
console.print(
|
|
70
|
+
f'(Set [bold]console[bold] log level: [cyan]{self.console_level_name}[/cyan])',
|
|
71
|
+
justify='right',
|
|
72
|
+
)
|
|
73
|
+
handler = RichHandler(console=console, omit_repeated_times=False)
|
|
74
|
+
handler.setLevel(self.console_level)
|
|
75
|
+
logging.basicConfig(
|
|
76
|
+
level=self.lowest_level,
|
|
77
|
+
format=log_format,
|
|
78
|
+
datefmt='[%x %X.%f]',
|
|
79
|
+
handlers=[handler],
|
|
80
|
+
force=True,
|
|
81
|
+
)
|
|
82
|
+
sys.excepthook = self.log_unhandled_exception
|
|
83
|
+
|
|
84
|
+
def start_file_logging(self, log_file: Path):
|
|
85
|
+
console.print(
|
|
86
|
+
f'(initialize log file [bold]{log_file}[/bold] with level: [cyan]{self.file_level_name}[/cyan])',
|
|
87
|
+
justify='right',
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
assert_is_dir(log_file.parent)
|
|
91
|
+
|
|
92
|
+
root_logger = logging.getLogger()
|
|
93
|
+
|
|
94
|
+
file_handler = logging.FileHandler(log_file, mode='a', encoding='utf-8')
|
|
95
|
+
file_handler.setLevel(self.file_level)
|
|
96
|
+
|
|
97
|
+
formatter = logging.Formatter(
|
|
98
|
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
99
|
+
datefmt='%Y-%m-%d %H:%M:%S',
|
|
100
|
+
)
|
|
101
|
+
file_handler.setFormatter(formatter)
|
|
102
|
+
|
|
103
|
+
root_logger.addHandler(file_handler)
|
|
104
|
+
|
|
105
|
+
def log_unhandled_exception(self, exc_type, exc_value, exc_traceback):
|
|
106
|
+
if issubclass(exc_type, KeyboardInterrupt):
|
|
107
|
+
logger.info('Program interrupted by user (KeyboardInterrupt). Exiting...')
|
|
108
|
+
sys.__excepthook__(exc_type, exc_value, exc_traceback)
|
|
109
|
+
else:
|
|
110
|
+
logger.exception(
|
|
111
|
+
'Unhandled exception occurred:',
|
|
112
|
+
exc_info=(exc_type, exc_value, exc_traceback),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class NoopLoggingManager(LoggingManager):
|
|
117
|
+
"""
|
|
118
|
+
Only for tests: A logging manager that does nothing.
|
|
119
|
+
"""
|
|
120
|
+
def __init__(self, *args, **kwargs):
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
def start_file_logging(self, log_file: Path):
|
|
124
|
+
pass
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import datetime
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from PyHardLinkBackup.logging_setup import LoggingManager
|
|
10
|
+
from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
|
|
11
|
+
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
12
|
+
from PyHardLinkBackup.utilities.filesystem import hash_file, humanized_fs_scan, iter_scandir_files
|
|
13
|
+
from PyHardLinkBackup.utilities.humanize import PrintTimingContextManager, human_filesize
|
|
14
|
+
from PyHardLinkBackup.utilities.rich_utils import DisplayFileTreeProgress
|
|
15
|
+
from PyHardLinkBackup.utilities.sha256sums import check_sha256sums, store_hash
|
|
16
|
+
from PyHardLinkBackup.utilities.tee import TeeStdoutContext
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclasses.dataclass
|
|
23
|
+
class RebuildResult:
|
|
24
|
+
process_count: int = 0
|
|
25
|
+
process_size: int = 0
|
|
26
|
+
#
|
|
27
|
+
added_size_count: int = 0
|
|
28
|
+
added_hash_count: int = 0
|
|
29
|
+
#
|
|
30
|
+
error_count: int = 0
|
|
31
|
+
#
|
|
32
|
+
hash_verified_count: int = 0
|
|
33
|
+
hash_mismatch_count: int = 0
|
|
34
|
+
hash_not_found_count: int = 0
|
|
35
|
+
#
|
|
36
|
+
unique_inode_count: int = 0
|
|
37
|
+
skip_by_inode_count: int = 0
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def rebuild_one_file(
|
|
41
|
+
*,
|
|
42
|
+
backup_root: Path,
|
|
43
|
+
entry: os.DirEntry,
|
|
44
|
+
size_db: FileSizeDatabase,
|
|
45
|
+
hash_db: FileHashDatabase,
|
|
46
|
+
seen_inodes: set,
|
|
47
|
+
skip_same_inode: bool,
|
|
48
|
+
rebuild_result: RebuildResult,
|
|
49
|
+
progress: DisplayFileTreeProgress,
|
|
50
|
+
):
|
|
51
|
+
inode = entry.inode()
|
|
52
|
+
if inode not in seen_inodes:
|
|
53
|
+
seen_inodes.add(inode)
|
|
54
|
+
else:
|
|
55
|
+
if skip_same_inode:
|
|
56
|
+
rebuild_result.skip_by_inode_count += 1
|
|
57
|
+
# Update counters used in progress display:
|
|
58
|
+
rebuild_result.process_size += entry.stat().st_size
|
|
59
|
+
rebuild_result.process_count += 1
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
file_path = Path(entry.path)
|
|
63
|
+
|
|
64
|
+
# We should ignore all files in the root backup directory itself
|
|
65
|
+
# e.g.: Our *-summary.txt and *.log files
|
|
66
|
+
if file_path.parent == backup_root:
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
rebuild_result.process_count += 1
|
|
70
|
+
|
|
71
|
+
if entry.name == 'SHA256SUMS':
|
|
72
|
+
# Skip existing SHA256SUMS files
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
size = entry.stat().st_size
|
|
76
|
+
rebuild_result.process_size += size
|
|
77
|
+
if size < size_db.MIN_SIZE:
|
|
78
|
+
# Small files will never deduplicate, skip them
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
file_hash = hash_file(file_path, progress=progress, total_size=size)
|
|
82
|
+
|
|
83
|
+
if size not in size_db:
|
|
84
|
+
size_db.add(size)
|
|
85
|
+
rebuild_result.added_size_count += 1
|
|
86
|
+
|
|
87
|
+
if file_hash not in hash_db:
|
|
88
|
+
hash_db[file_hash] = file_path
|
|
89
|
+
rebuild_result.added_hash_count += 1
|
|
90
|
+
|
|
91
|
+
# We have calculated the current hash of the file,
|
|
92
|
+
# Let's check if we can verify it, too:
|
|
93
|
+
file_path = Path(entry.path)
|
|
94
|
+
compare_result = check_sha256sums(
|
|
95
|
+
file_path=file_path,
|
|
96
|
+
file_hash=file_hash,
|
|
97
|
+
)
|
|
98
|
+
if compare_result is True:
|
|
99
|
+
rebuild_result.hash_verified_count += 1
|
|
100
|
+
elif compare_result is False:
|
|
101
|
+
rebuild_result.hash_mismatch_count += 1
|
|
102
|
+
elif compare_result is None:
|
|
103
|
+
rebuild_result.hash_not_found_count += 1
|
|
104
|
+
store_hash(
|
|
105
|
+
file_path=file_path,
|
|
106
|
+
file_hash=file_hash,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def rebuild(
|
|
111
|
+
backup_root: Path,
|
|
112
|
+
skip_same_inode: bool,
|
|
113
|
+
log_manager: LoggingManager,
|
|
114
|
+
) -> RebuildResult:
|
|
115
|
+
backup_root = backup_root.resolve()
|
|
116
|
+
if not backup_root.is_dir():
|
|
117
|
+
print(f'Error: Backup directory "{backup_root}" does not exist!')
|
|
118
|
+
sys.exit(1)
|
|
119
|
+
|
|
120
|
+
phlb_conf_dir = backup_root / '.phlb'
|
|
121
|
+
if not phlb_conf_dir.is_dir():
|
|
122
|
+
print(
|
|
123
|
+
f'Error: Backup directory "{backup_root}" seems to be wrong:'
|
|
124
|
+
f' Our hidden ".phlb" configuration directory is missing!'
|
|
125
|
+
)
|
|
126
|
+
sys.exit(1)
|
|
127
|
+
|
|
128
|
+
timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S')
|
|
129
|
+
log_manager.start_file_logging(log_file=backup_root / f'{timestamp}-rebuild.log')
|
|
130
|
+
|
|
131
|
+
with PrintTimingContextManager('Filesystem scan completed in'):
|
|
132
|
+
file_count, total_size = humanized_fs_scan(
|
|
133
|
+
path=backup_root,
|
|
134
|
+
one_file_system=False,
|
|
135
|
+
src_device_id=None,
|
|
136
|
+
excludes={'.phlb'},
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# We should ignore all files in the root backup directory itself
|
|
140
|
+
# e.g.: Our *-summary.txt and *.log files
|
|
141
|
+
for file in backup_root.iterdir():
|
|
142
|
+
if file.is_file():
|
|
143
|
+
file_count -= 1
|
|
144
|
+
total_size -= file.stat().st_size
|
|
145
|
+
|
|
146
|
+
with DisplayFileTreeProgress(
|
|
147
|
+
description=f'Rebuild {backup_root}...',
|
|
148
|
+
total_file_count=file_count,
|
|
149
|
+
total_size=total_size,
|
|
150
|
+
) as progress:
|
|
151
|
+
# "Databases" for deduplication
|
|
152
|
+
size_db = FileSizeDatabase(phlb_conf_dir)
|
|
153
|
+
hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
|
|
154
|
+
|
|
155
|
+
seen_inodes = set()
|
|
156
|
+
|
|
157
|
+
rebuild_result = RebuildResult()
|
|
158
|
+
|
|
159
|
+
next_update = 0
|
|
160
|
+
for entry in iter_scandir_files(
|
|
161
|
+
path=backup_root,
|
|
162
|
+
one_file_system=False,
|
|
163
|
+
src_device_id=None,
|
|
164
|
+
excludes={'.phlb'},
|
|
165
|
+
):
|
|
166
|
+
try:
|
|
167
|
+
rebuild_one_file(
|
|
168
|
+
backup_root=backup_root,
|
|
169
|
+
entry=entry,
|
|
170
|
+
size_db=size_db,
|
|
171
|
+
hash_db=hash_db,
|
|
172
|
+
seen_inodes=seen_inodes,
|
|
173
|
+
skip_same_inode=skip_same_inode,
|
|
174
|
+
rebuild_result=rebuild_result,
|
|
175
|
+
progress=progress,
|
|
176
|
+
)
|
|
177
|
+
except Exception as err:
|
|
178
|
+
logger.exception(f'Backup {entry.path} {err.__class__.__name__}: {err}')
|
|
179
|
+
rebuild_result.error_count += 1
|
|
180
|
+
else:
|
|
181
|
+
now = time.monotonic()
|
|
182
|
+
if now >= next_update:
|
|
183
|
+
progress.update(
|
|
184
|
+
completed_file_count=rebuild_result.process_count, completed_size=rebuild_result.process_size
|
|
185
|
+
)
|
|
186
|
+
next_update = now + 0.5
|
|
187
|
+
|
|
188
|
+
# Finalize progress indicator values:
|
|
189
|
+
progress.update(completed_file_count=rebuild_result.process_count, completed_size=rebuild_result.process_size)
|
|
190
|
+
|
|
191
|
+
rebuild_result.unique_inode_count = len(seen_inodes)
|
|
192
|
+
|
|
193
|
+
summary_file = backup_root / f'{timestamp}-rebuild-summary.txt'
|
|
194
|
+
with TeeStdoutContext(summary_file):
|
|
195
|
+
print(f'\nRebuild "{backup_root}" completed:')
|
|
196
|
+
print(f' Total files processed: {rebuild_result.process_count}')
|
|
197
|
+
print(f' Total size processed: {human_filesize(rebuild_result.process_size)}')
|
|
198
|
+
|
|
199
|
+
print(f' Unique inodes count: {rebuild_result.unique_inode_count}')
|
|
200
|
+
print(f' Skipped files by inode: {rebuild_result.skip_by_inode_count}')
|
|
201
|
+
|
|
202
|
+
print(f' Added file size information entries: {rebuild_result.added_size_count}')
|
|
203
|
+
print(f' Added file hash entries: {rebuild_result.added_hash_count}')
|
|
204
|
+
|
|
205
|
+
if rebuild_result.error_count > 0:
|
|
206
|
+
print(f' Errors during rebuild: {rebuild_result.error_count} (see log for details)')
|
|
207
|
+
|
|
208
|
+
print('\nSHA256SUMS verification results:')
|
|
209
|
+
print(f' Successfully verified files: {rebuild_result.hash_verified_count}')
|
|
210
|
+
print(f' File hash mismatches: {rebuild_result.hash_mismatch_count}')
|
|
211
|
+
print(f' File hashes not found, newly stored: {rebuild_result.hash_not_found_count}')
|
|
212
|
+
|
|
213
|
+
print()
|
|
214
|
+
|
|
215
|
+
logger.info('Rebuild completed. Summary created: %s', summary_file)
|
|
216
|
+
|
|
217
|
+
return rebuild_result
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import unittest.util
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from bx_py_utils.test_utils.deny_requests import deny_any_real_request
|
|
6
|
+
from cli_base.cli_tools.verbosity import MAX_LOG_LEVEL, setup_logging
|
|
7
|
+
from rich import print # noqa
|
|
8
|
+
from typeguard import install_import_hook
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# Check type annotations via typeguard in all tests:
|
|
12
|
+
install_import_hook(packages=('PyHardLinkBackup',))
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def pre_configure_tests() -> None:
|
|
16
|
+
print(f'Configure unittests via "load_tests Protocol" from {Path(__file__).relative_to(Path.cwd())}')
|
|
17
|
+
|
|
18
|
+
# Hacky way to display more "assert"-Context in failing tests:
|
|
19
|
+
_MIN_MAX_DIFF = unittest.util._MAX_LENGTH - unittest.util._MIN_DIFF_LEN
|
|
20
|
+
unittest.util._MAX_LENGTH = int(os.environ.get('UNITTEST_MAX_LENGTH', 2000))
|
|
21
|
+
unittest.util._MIN_DIFF_LEN = unittest.util._MAX_LENGTH - _MIN_MAX_DIFF
|
|
22
|
+
|
|
23
|
+
# Deny any request via docket/urllib3 because tests they should mock all requests:
|
|
24
|
+
deny_any_real_request()
|
|
25
|
+
|
|
26
|
+
# Display DEBUG logs in tests:
|
|
27
|
+
setup_logging(verbosity=MAX_LOG_LEVEL)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def load_tests(loader, tests, pattern):
|
|
31
|
+
"""
|
|
32
|
+
Use unittest "load_tests Protocol" as a hook to setup test environment before running tests.
|
|
33
|
+
https://docs.python.org/3/library/unittest.html#load-tests-protocol
|
|
34
|
+
"""
|
|
35
|
+
pre_configure_tests()
|
|
36
|
+
return loader.discover(start_dir=Path(__file__).parent, pattern=pattern)
|