PyHardLinkBackup 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyHardLinkBackup/__init__.py +7 -0
- PyHardLinkBackup/__main__.py +10 -0
- PyHardLinkBackup/backup.py +261 -0
- PyHardLinkBackup/cli_app/__init__.py +41 -0
- PyHardLinkBackup/cli_app/phlb.py +123 -0
- PyHardLinkBackup/cli_dev/__init__.py +70 -0
- PyHardLinkBackup/cli_dev/benchmark.py +138 -0
- PyHardLinkBackup/cli_dev/code_style.py +12 -0
- PyHardLinkBackup/cli_dev/packaging.py +65 -0
- PyHardLinkBackup/cli_dev/shell_completion.py +23 -0
- PyHardLinkBackup/cli_dev/testing.py +52 -0
- PyHardLinkBackup/cli_dev/update_readme_history.py +33 -0
- PyHardLinkBackup/compare_backup.py +212 -0
- PyHardLinkBackup/constants.py +16 -0
- PyHardLinkBackup/logging_setup.py +124 -0
- PyHardLinkBackup/rebuild_databases.py +176 -0
- PyHardLinkBackup/tests/__init__.py +36 -0
- PyHardLinkBackup/tests/test_backup.py +628 -0
- PyHardLinkBackup/tests/test_compare_backup.py +86 -0
- PyHardLinkBackup/tests/test_doc_write.py +26 -0
- PyHardLinkBackup/tests/test_doctests.py +10 -0
- PyHardLinkBackup/tests/test_project_setup.py +46 -0
- PyHardLinkBackup/tests/test_readme.py +75 -0
- PyHardLinkBackup/tests/test_readme_history.py +9 -0
- PyHardLinkBackup/tests/test_rebuild_database.py +224 -0
- PyHardLinkBackup/utilities/__init__.py +0 -0
- PyHardLinkBackup/utilities/file_hash_database.py +62 -0
- PyHardLinkBackup/utilities/file_size_database.py +46 -0
- PyHardLinkBackup/utilities/filesystem.py +158 -0
- PyHardLinkBackup/utilities/humanize.py +39 -0
- PyHardLinkBackup/utilities/rich_utils.py +99 -0
- PyHardLinkBackup/utilities/sha256sums.py +61 -0
- PyHardLinkBackup/utilities/tee.py +40 -0
- PyHardLinkBackup/utilities/tests/__init__.py +0 -0
- PyHardLinkBackup/utilities/tests/test_file_hash_database.py +143 -0
- PyHardLinkBackup/utilities/tests/test_file_size_database.py +138 -0
- PyHardLinkBackup/utilities/tests/test_filesystem.py +126 -0
- PyHardLinkBackup/utilities/tyro_cli_shared_args.py +12 -0
- pyhardlinkbackup-1.5.0.dist-info/METADATA +600 -0
- pyhardlinkbackup-1.5.0.dist-info/RECORD +42 -0
- pyhardlinkbackup-1.5.0.dist-info/WHEEL +4 -0
- pyhardlinkbackup-1.5.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import datetime
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from PyHardLinkBackup.logging_setup import LoggingManager
|
|
10
|
+
from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
|
|
11
|
+
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
12
|
+
from PyHardLinkBackup.utilities.filesystem import hash_file, humanized_fs_scan, iter_scandir_files
|
|
13
|
+
from PyHardLinkBackup.utilities.humanize import PrintTimingContextManager, human_filesize
|
|
14
|
+
from PyHardLinkBackup.utilities.rich_utils import DisplayFileTreeProgress
|
|
15
|
+
from PyHardLinkBackup.utilities.sha256sums import check_sha256sums, store_hash
|
|
16
|
+
from PyHardLinkBackup.utilities.tee import TeeStdoutContext
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclasses.dataclass
|
|
23
|
+
class RebuildResult:
|
|
24
|
+
process_count: int = 0
|
|
25
|
+
process_size: int = 0
|
|
26
|
+
#
|
|
27
|
+
added_size_count: int = 0
|
|
28
|
+
added_hash_count: int = 0
|
|
29
|
+
#
|
|
30
|
+
error_count: int = 0
|
|
31
|
+
#
|
|
32
|
+
hash_verified_count: int = 0
|
|
33
|
+
hash_mismatch_count: int = 0
|
|
34
|
+
hash_not_found_count: int = 0
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def rebuild_one_file(
|
|
38
|
+
*,
|
|
39
|
+
backup_root: Path,
|
|
40
|
+
entry: os.DirEntry,
|
|
41
|
+
size_db: FileSizeDatabase,
|
|
42
|
+
hash_db: FileHashDatabase,
|
|
43
|
+
rebuild_result: RebuildResult,
|
|
44
|
+
):
|
|
45
|
+
file_path = Path(entry.path)
|
|
46
|
+
|
|
47
|
+
# We should ignore all files in the root backup directory itself
|
|
48
|
+
# e.g.: Our *-summary.txt and *.log files
|
|
49
|
+
if file_path.parent == backup_root:
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
rebuild_result.process_count += 1
|
|
53
|
+
|
|
54
|
+
if entry.name == 'SHA256SUMS':
|
|
55
|
+
# Skip existing SHA256SUMS files
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
size = entry.stat().st_size
|
|
59
|
+
rebuild_result.process_size += size
|
|
60
|
+
|
|
61
|
+
if size < size_db.MIN_SIZE:
|
|
62
|
+
# Small files will never deduplicate, skip them
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
file_hash = hash_file(file_path)
|
|
66
|
+
|
|
67
|
+
if size not in size_db:
|
|
68
|
+
size_db.add(size)
|
|
69
|
+
rebuild_result.added_size_count += 1
|
|
70
|
+
|
|
71
|
+
if file_hash not in hash_db:
|
|
72
|
+
hash_db[file_hash] = file_path
|
|
73
|
+
rebuild_result.added_hash_count += 1
|
|
74
|
+
|
|
75
|
+
# We have calculated the current hash of the file,
|
|
76
|
+
# Let's check if we can verify it, too:
|
|
77
|
+
file_path = Path(entry.path)
|
|
78
|
+
compare_result = check_sha256sums(
|
|
79
|
+
file_path=file_path,
|
|
80
|
+
file_hash=file_hash,
|
|
81
|
+
)
|
|
82
|
+
if compare_result is True:
|
|
83
|
+
rebuild_result.hash_verified_count += 1
|
|
84
|
+
elif compare_result is False:
|
|
85
|
+
rebuild_result.hash_mismatch_count += 1
|
|
86
|
+
elif compare_result is None:
|
|
87
|
+
rebuild_result.hash_not_found_count += 1
|
|
88
|
+
store_hash(
|
|
89
|
+
file_path=file_path,
|
|
90
|
+
file_hash=file_hash,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def rebuild(
|
|
95
|
+
backup_root: Path,
|
|
96
|
+
log_manager: LoggingManager,
|
|
97
|
+
) -> RebuildResult:
|
|
98
|
+
backup_root = backup_root.resolve()
|
|
99
|
+
if not backup_root.is_dir():
|
|
100
|
+
print(f'Error: Backup directory "{backup_root}" does not exist!')
|
|
101
|
+
sys.exit(1)
|
|
102
|
+
|
|
103
|
+
phlb_conf_dir = backup_root / '.phlb'
|
|
104
|
+
if not phlb_conf_dir.is_dir():
|
|
105
|
+
print(
|
|
106
|
+
f'Error: Backup directory "{backup_root}" seems to be wrong:'
|
|
107
|
+
f' Our hidden ".phlb" configuration directory is missing!'
|
|
108
|
+
)
|
|
109
|
+
sys.exit(1)
|
|
110
|
+
|
|
111
|
+
timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S')
|
|
112
|
+
log_manager.start_file_logging(log_file=backup_root / f'{timestamp}-rebuild.log')
|
|
113
|
+
|
|
114
|
+
with PrintTimingContextManager('Filesystem scan completed in'):
|
|
115
|
+
file_count, total_size = humanized_fs_scan(backup_root, excludes={'.phlb'})
|
|
116
|
+
|
|
117
|
+
# We should ignore all files in the root backup directory itself
|
|
118
|
+
# e.g.: Our *-summary.txt and *.log files
|
|
119
|
+
for file in backup_root.iterdir():
|
|
120
|
+
if file.is_file():
|
|
121
|
+
file_count -= 1
|
|
122
|
+
total_size -= file.stat().st_size
|
|
123
|
+
|
|
124
|
+
with DisplayFileTreeProgress(file_count, total_size) as progress:
|
|
125
|
+
# "Databases" for deduplication
|
|
126
|
+
size_db = FileSizeDatabase(phlb_conf_dir)
|
|
127
|
+
hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
|
|
128
|
+
|
|
129
|
+
rebuild_result = RebuildResult()
|
|
130
|
+
|
|
131
|
+
next_update = 0
|
|
132
|
+
for entry in iter_scandir_files(backup_root, excludes={'.phlb'}):
|
|
133
|
+
try:
|
|
134
|
+
rebuild_one_file(
|
|
135
|
+
backup_root=backup_root,
|
|
136
|
+
entry=entry,
|
|
137
|
+
size_db=size_db,
|
|
138
|
+
hash_db=hash_db,
|
|
139
|
+
rebuild_result=rebuild_result,
|
|
140
|
+
)
|
|
141
|
+
except Exception as err:
|
|
142
|
+
logger.exception(f'Backup {entry.path} {err.__class__.__name__}: {err}')
|
|
143
|
+
rebuild_result.error_count += 1
|
|
144
|
+
else:
|
|
145
|
+
now = time.monotonic()
|
|
146
|
+
if now >= next_update:
|
|
147
|
+
progress.update(
|
|
148
|
+
completed_file_count=rebuild_result.process_count, completed_size=rebuild_result.process_size
|
|
149
|
+
)
|
|
150
|
+
next_update = now + 0.5
|
|
151
|
+
|
|
152
|
+
# Finalize progress indicator values:
|
|
153
|
+
progress.update(completed_file_count=rebuild_result.process_count, completed_size=rebuild_result.process_size)
|
|
154
|
+
|
|
155
|
+
summary_file = backup_root / f'{timestamp}-rebuild-summary.txt'
|
|
156
|
+
with TeeStdoutContext(summary_file):
|
|
157
|
+
print(f'\nRebuild "{backup_root}" completed:')
|
|
158
|
+
print(f' Total files processed: {rebuild_result.process_count}')
|
|
159
|
+
print(f' Total size processed: {human_filesize(rebuild_result.process_size)}')
|
|
160
|
+
|
|
161
|
+
print(f' Added file size information entries: {rebuild_result.added_size_count}')
|
|
162
|
+
print(f' Added file hash entries: {rebuild_result.added_hash_count}')
|
|
163
|
+
|
|
164
|
+
if rebuild_result.error_count > 0:
|
|
165
|
+
print(f' Errors during rebuild: {rebuild_result.error_count} (see log for details)')
|
|
166
|
+
|
|
167
|
+
print('\nSHA256SUMS verification results:')
|
|
168
|
+
print(f' Successfully verified files: {rebuild_result.hash_verified_count}')
|
|
169
|
+
print(f' File hash mismatches: {rebuild_result.hash_mismatch_count}')
|
|
170
|
+
print(f' File hashes not found, newly stored: {rebuild_result.hash_not_found_count}')
|
|
171
|
+
|
|
172
|
+
print()
|
|
173
|
+
|
|
174
|
+
logger.info('Rebuild completed. Summary created: %s', summary_file)
|
|
175
|
+
|
|
176
|
+
return rebuild_result
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import unittest.util
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from bx_py_utils.test_utils.deny_requests import deny_any_real_request
|
|
6
|
+
from cli_base.cli_tools.verbosity import MAX_LOG_LEVEL, setup_logging
|
|
7
|
+
from rich import print # noqa
|
|
8
|
+
from typeguard import install_import_hook
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# Check type annotations via typeguard in all tests:
|
|
12
|
+
install_import_hook(packages=('PyHardLinkBackup',))
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def pre_configure_tests() -> None:
|
|
16
|
+
print(f'Configure unittests via "load_tests Protocol" from {Path(__file__).relative_to(Path.cwd())}')
|
|
17
|
+
|
|
18
|
+
# Hacky way to display more "assert"-Context in failing tests:
|
|
19
|
+
_MIN_MAX_DIFF = unittest.util._MAX_LENGTH - unittest.util._MIN_DIFF_LEN
|
|
20
|
+
unittest.util._MAX_LENGTH = int(os.environ.get('UNITTEST_MAX_LENGTH', 2000))
|
|
21
|
+
unittest.util._MIN_DIFF_LEN = unittest.util._MAX_LENGTH - _MIN_MAX_DIFF
|
|
22
|
+
|
|
23
|
+
# Deny any request via docket/urllib3 because tests they should mock all requests:
|
|
24
|
+
deny_any_real_request()
|
|
25
|
+
|
|
26
|
+
# Display DEBUG logs in tests:
|
|
27
|
+
setup_logging(verbosity=MAX_LOG_LEVEL)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def load_tests(loader, tests, pattern):
|
|
31
|
+
"""
|
|
32
|
+
Use unittest "load_tests Protocol" as a hook to setup test environment before running tests.
|
|
33
|
+
https://docs.python.org/3/library/unittest.html#load-tests-protocol
|
|
34
|
+
"""
|
|
35
|
+
pre_configure_tests()
|
|
36
|
+
return loader.discover(start_dir=Path(__file__).parent, pattern=pattern)
|