PyHardLinkBackup 1.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. PyHardLinkBackup/__init__.py +7 -0
  2. PyHardLinkBackup/__main__.py +10 -0
  3. PyHardLinkBackup/backup.py +297 -0
  4. PyHardLinkBackup/cli_app/__init__.py +41 -0
  5. PyHardLinkBackup/cli_app/phlb.py +136 -0
  6. PyHardLinkBackup/cli_dev/__init__.py +70 -0
  7. PyHardLinkBackup/cli_dev/__main__.py +10 -0
  8. PyHardLinkBackup/cli_dev/benchmark.py +138 -0
  9. PyHardLinkBackup/cli_dev/code_style.py +12 -0
  10. PyHardLinkBackup/cli_dev/debugging.py +47 -0
  11. PyHardLinkBackup/cli_dev/packaging.py +62 -0
  12. PyHardLinkBackup/cli_dev/shell_completion.py +23 -0
  13. PyHardLinkBackup/cli_dev/testing.py +52 -0
  14. PyHardLinkBackup/cli_dev/update_readme_history.py +33 -0
  15. PyHardLinkBackup/compare_backup.py +259 -0
  16. PyHardLinkBackup/constants.py +18 -0
  17. PyHardLinkBackup/logging_setup.py +124 -0
  18. PyHardLinkBackup/rebuild_databases.py +217 -0
  19. PyHardLinkBackup/tests/__init__.py +36 -0
  20. PyHardLinkBackup/tests/test_backup.py +1167 -0
  21. PyHardLinkBackup/tests/test_compare_backup.py +167 -0
  22. PyHardLinkBackup/tests/test_doc_write.py +26 -0
  23. PyHardLinkBackup/tests/test_doctests.py +10 -0
  24. PyHardLinkBackup/tests/test_project_setup.py +46 -0
  25. PyHardLinkBackup/tests/test_readme.py +75 -0
  26. PyHardLinkBackup/tests/test_readme_history.py +9 -0
  27. PyHardLinkBackup/tests/test_rebuild_database.py +266 -0
  28. PyHardLinkBackup/utilities/__init__.py +0 -0
  29. PyHardLinkBackup/utilities/file_hash_database.py +62 -0
  30. PyHardLinkBackup/utilities/file_size_database.py +46 -0
  31. PyHardLinkBackup/utilities/filesystem.py +257 -0
  32. PyHardLinkBackup/utilities/humanize.py +39 -0
  33. PyHardLinkBackup/utilities/rich_utils.py +237 -0
  34. PyHardLinkBackup/utilities/sha256sums.py +61 -0
  35. PyHardLinkBackup/utilities/tee.py +40 -0
  36. PyHardLinkBackup/utilities/tests/__init__.py +0 -0
  37. PyHardLinkBackup/utilities/tests/test_file_hash_database.py +153 -0
  38. PyHardLinkBackup/utilities/tests/test_file_size_database.py +151 -0
  39. PyHardLinkBackup/utilities/tests/test_filesystem.py +167 -0
  40. PyHardLinkBackup/utilities/tests/unittest_utilities.py +78 -0
  41. PyHardLinkBackup/utilities/tyro_cli_shared_args.py +29 -0
  42. pyhardlinkbackup-1.8.1.dist-info/METADATA +700 -0
  43. pyhardlinkbackup-1.8.1.dist-info/RECORD +45 -0
  44. pyhardlinkbackup-1.8.1.dist-info/WHEEL +4 -0
  45. pyhardlinkbackup-1.8.1.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,124 @@
1
+ import logging
2
+ import sys
3
+ from pathlib import Path
4
+ from typing import Annotated, Literal
5
+
6
+ import tyro
7
+ from bx_py_utils.path import assert_is_dir
8
+ from rich import (
9
+ get_console,
10
+ print, # noqa
11
+ )
12
+ from rich.logging import RichHandler
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ LogLevelLiteral = Literal['debug', 'info', 'warning', 'error']
18
+
19
+
20
+ TyroConsoleLogLevelArgType = Annotated[
21
+ LogLevelLiteral,
22
+ tyro.conf.arg(
23
+ help='Log level for console logging.',
24
+ ),
25
+ ]
26
+ DEFAULT_CONSOLE_LOG_LEVEL: TyroConsoleLogLevelArgType = 'warning'
27
+
28
+
29
+ TyroLogFileLevelArgType = Annotated[
30
+ LogLevelLiteral,
31
+ tyro.conf.arg(
32
+ help='Log level for the log file',
33
+ ),
34
+ ]
35
+ DEFAULT_LOG_FILE_LEVEL: TyroLogFileLevelArgType = 'info'
36
+
37
+
38
+ def log_level_name2int(level_name: str) -> int:
39
+ level_name = level_name.upper()
40
+ level_mapping = logging.getLevelNamesMapping()
41
+ try:
42
+ return level_mapping[level_name]
43
+ except KeyError as err:
44
+ raise ValueError(f'Invalid log level name: {level_name}') from err
45
+
46
+
47
+ console = get_console()
48
+
49
+
50
+ class LoggingManager:
51
+ def __init__(
52
+ self,
53
+ *,
54
+ console_level: TyroConsoleLogLevelArgType,
55
+ file_level: TyroLogFileLevelArgType,
56
+ ):
57
+ self.console_level_name = console_level
58
+ self.console_level: int = log_level_name2int(console_level)
59
+ self.file_level_name = file_level
60
+ self.file_level: int = log_level_name2int(file_level)
61
+
62
+ self.lowest_level = min(self.console_level, self.file_level)
63
+
64
+ if console_level == logging.DEBUG:
65
+ log_format = '(%(name)s) %(message)s'
66
+ else:
67
+ log_format = '%(message)s'
68
+
69
+ console.print(
70
+ f'(Set [bold]console[bold] log level: [cyan]{self.console_level_name}[/cyan])',
71
+ justify='right',
72
+ )
73
+ handler = RichHandler(console=console, omit_repeated_times=False)
74
+ handler.setLevel(self.console_level)
75
+ logging.basicConfig(
76
+ level=self.lowest_level,
77
+ format=log_format,
78
+ datefmt='[%x %X.%f]',
79
+ handlers=[handler],
80
+ force=True,
81
+ )
82
+ sys.excepthook = self.log_unhandled_exception
83
+
84
+ def start_file_logging(self, log_file: Path):
85
+ console.print(
86
+ f'(initialize log file [bold]{log_file}[/bold] with level: [cyan]{self.file_level_name}[/cyan])',
87
+ justify='right',
88
+ )
89
+
90
+ assert_is_dir(log_file.parent)
91
+
92
+ root_logger = logging.getLogger()
93
+
94
+ file_handler = logging.FileHandler(log_file, mode='a', encoding='utf-8')
95
+ file_handler.setLevel(self.file_level)
96
+
97
+ formatter = logging.Formatter(
98
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
99
+ datefmt='%Y-%m-%d %H:%M:%S',
100
+ )
101
+ file_handler.setFormatter(formatter)
102
+
103
+ root_logger.addHandler(file_handler)
104
+
105
+ def log_unhandled_exception(self, exc_type, exc_value, exc_traceback):
106
+ if issubclass(exc_type, KeyboardInterrupt):
107
+ logger.info('Program interrupted by user (KeyboardInterrupt). Exiting...')
108
+ sys.__excepthook__(exc_type, exc_value, exc_traceback)
109
+ else:
110
+ logger.exception(
111
+ 'Unhandled exception occurred:',
112
+ exc_info=(exc_type, exc_value, exc_traceback),
113
+ )
114
+
115
+
116
+ class NoopLoggingManager(LoggingManager):
117
+ """
118
+ Only for tests: A logging manager that does nothing.
119
+ """
120
+ def __init__(self, *args, **kwargs):
121
+ pass
122
+
123
+ def start_file_logging(self, log_file: Path):
124
+ pass
@@ -0,0 +1,217 @@
1
+ import dataclasses
2
+ import datetime
3
+ import logging
4
+ import os
5
+ import sys
6
+ import time
7
+ from pathlib import Path
8
+
9
+ from PyHardLinkBackup.logging_setup import LoggingManager
10
+ from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
11
+ from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
12
+ from PyHardLinkBackup.utilities.filesystem import hash_file, humanized_fs_scan, iter_scandir_files
13
+ from PyHardLinkBackup.utilities.humanize import PrintTimingContextManager, human_filesize
14
+ from PyHardLinkBackup.utilities.rich_utils import DisplayFileTreeProgress
15
+ from PyHardLinkBackup.utilities.sha256sums import check_sha256sums, store_hash
16
+ from PyHardLinkBackup.utilities.tee import TeeStdoutContext
17
+
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @dataclasses.dataclass
23
+ class RebuildResult:
24
+ process_count: int = 0
25
+ process_size: int = 0
26
+ #
27
+ added_size_count: int = 0
28
+ added_hash_count: int = 0
29
+ #
30
+ error_count: int = 0
31
+ #
32
+ hash_verified_count: int = 0
33
+ hash_mismatch_count: int = 0
34
+ hash_not_found_count: int = 0
35
+ #
36
+ unique_inode_count: int = 0
37
+ skip_by_inode_count: int = 0
38
+
39
+
40
+ def rebuild_one_file(
41
+ *,
42
+ backup_root: Path,
43
+ entry: os.DirEntry,
44
+ size_db: FileSizeDatabase,
45
+ hash_db: FileHashDatabase,
46
+ seen_inodes: set,
47
+ skip_same_inode: bool,
48
+ rebuild_result: RebuildResult,
49
+ progress: DisplayFileTreeProgress,
50
+ ):
51
+ inode = entry.inode()
52
+ if inode not in seen_inodes:
53
+ seen_inodes.add(inode)
54
+ else:
55
+ if skip_same_inode:
56
+ rebuild_result.skip_by_inode_count += 1
57
+ # Update counters used in progress display:
58
+ rebuild_result.process_size += entry.stat().st_size
59
+ rebuild_result.process_count += 1
60
+ return
61
+
62
+ file_path = Path(entry.path)
63
+
64
+ # We should ignore all files in the root backup directory itself
65
+ # e.g.: Our *-summary.txt and *.log files
66
+ if file_path.parent == backup_root:
67
+ return
68
+
69
+ rebuild_result.process_count += 1
70
+
71
+ if entry.name == 'SHA256SUMS':
72
+ # Skip existing SHA256SUMS files
73
+ return
74
+
75
+ size = entry.stat().st_size
76
+ rebuild_result.process_size += size
77
+ if size < size_db.MIN_SIZE:
78
+ # Small files will never deduplicate, skip them
79
+ return
80
+
81
+ file_hash = hash_file(file_path, progress=progress, total_size=size)
82
+
83
+ if size not in size_db:
84
+ size_db.add(size)
85
+ rebuild_result.added_size_count += 1
86
+
87
+ if file_hash not in hash_db:
88
+ hash_db[file_hash] = file_path
89
+ rebuild_result.added_hash_count += 1
90
+
91
+ # We have calculated the current hash of the file,
92
+ # Let's check if we can verify it, too:
93
+ file_path = Path(entry.path)
94
+ compare_result = check_sha256sums(
95
+ file_path=file_path,
96
+ file_hash=file_hash,
97
+ )
98
+ if compare_result is True:
99
+ rebuild_result.hash_verified_count += 1
100
+ elif compare_result is False:
101
+ rebuild_result.hash_mismatch_count += 1
102
+ elif compare_result is None:
103
+ rebuild_result.hash_not_found_count += 1
104
+ store_hash(
105
+ file_path=file_path,
106
+ file_hash=file_hash,
107
+ )
108
+
109
+
110
+ def rebuild(
111
+ backup_root: Path,
112
+ skip_same_inode: bool,
113
+ log_manager: LoggingManager,
114
+ ) -> RebuildResult:
115
+ backup_root = backup_root.resolve()
116
+ if not backup_root.is_dir():
117
+ print(f'Error: Backup directory "{backup_root}" does not exist!')
118
+ sys.exit(1)
119
+
120
+ phlb_conf_dir = backup_root / '.phlb'
121
+ if not phlb_conf_dir.is_dir():
122
+ print(
123
+ f'Error: Backup directory "{backup_root}" seems to be wrong:'
124
+ f' Our hidden ".phlb" configuration directory is missing!'
125
+ )
126
+ sys.exit(1)
127
+
128
+ timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S')
129
+ log_manager.start_file_logging(log_file=backup_root / f'{timestamp}-rebuild.log')
130
+
131
+ with PrintTimingContextManager('Filesystem scan completed in'):
132
+ file_count, total_size = humanized_fs_scan(
133
+ path=backup_root,
134
+ one_file_system=False,
135
+ src_device_id=None,
136
+ excludes={'.phlb'},
137
+ )
138
+
139
+ # We should ignore all files in the root backup directory itself
140
+ # e.g.: Our *-summary.txt and *.log files
141
+ for file in backup_root.iterdir():
142
+ if file.is_file():
143
+ file_count -= 1
144
+ total_size -= file.stat().st_size
145
+
146
+ with DisplayFileTreeProgress(
147
+ description=f'Rebuild {backup_root}...',
148
+ total_file_count=file_count,
149
+ total_size=total_size,
150
+ ) as progress:
151
+ # "Databases" for deduplication
152
+ size_db = FileSizeDatabase(phlb_conf_dir)
153
+ hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
154
+
155
+ seen_inodes = set()
156
+
157
+ rebuild_result = RebuildResult()
158
+
159
+ next_update = 0
160
+ for entry in iter_scandir_files(
161
+ path=backup_root,
162
+ one_file_system=False,
163
+ src_device_id=None,
164
+ excludes={'.phlb'},
165
+ ):
166
+ try:
167
+ rebuild_one_file(
168
+ backup_root=backup_root,
169
+ entry=entry,
170
+ size_db=size_db,
171
+ hash_db=hash_db,
172
+ seen_inodes=seen_inodes,
173
+ skip_same_inode=skip_same_inode,
174
+ rebuild_result=rebuild_result,
175
+ progress=progress,
176
+ )
177
+ except Exception as err:
178
+ logger.exception(f'Backup {entry.path} {err.__class__.__name__}: {err}')
179
+ rebuild_result.error_count += 1
180
+ else:
181
+ now = time.monotonic()
182
+ if now >= next_update:
183
+ progress.update(
184
+ completed_file_count=rebuild_result.process_count, completed_size=rebuild_result.process_size
185
+ )
186
+ next_update = now + 0.5
187
+
188
+ # Finalize progress indicator values:
189
+ progress.update(completed_file_count=rebuild_result.process_count, completed_size=rebuild_result.process_size)
190
+
191
+ rebuild_result.unique_inode_count = len(seen_inodes)
192
+
193
+ summary_file = backup_root / f'{timestamp}-rebuild-summary.txt'
194
+ with TeeStdoutContext(summary_file):
195
+ print(f'\nRebuild "{backup_root}" completed:')
196
+ print(f' Total files processed: {rebuild_result.process_count}')
197
+ print(f' Total size processed: {human_filesize(rebuild_result.process_size)}')
198
+
199
+ print(f' Unique inodes count: {rebuild_result.unique_inode_count}')
200
+ print(f' Skipped files by inode: {rebuild_result.skip_by_inode_count}')
201
+
202
+ print(f' Added file size information entries: {rebuild_result.added_size_count}')
203
+ print(f' Added file hash entries: {rebuild_result.added_hash_count}')
204
+
205
+ if rebuild_result.error_count > 0:
206
+ print(f' Errors during rebuild: {rebuild_result.error_count} (see log for details)')
207
+
208
+ print('\nSHA256SUMS verification results:')
209
+ print(f' Successfully verified files: {rebuild_result.hash_verified_count}')
210
+ print(f' File hash mismatches: {rebuild_result.hash_mismatch_count}')
211
+ print(f' File hashes not found, newly stored: {rebuild_result.hash_not_found_count}')
212
+
213
+ print()
214
+
215
+ logger.info('Rebuild completed. Summary created: %s', summary_file)
216
+
217
+ return rebuild_result
@@ -0,0 +1,36 @@
1
+ import os
2
+ import unittest.util
3
+ from pathlib import Path
4
+
5
+ from bx_py_utils.test_utils.deny_requests import deny_any_real_request
6
+ from cli_base.cli_tools.verbosity import MAX_LOG_LEVEL, setup_logging
7
+ from rich import print # noqa
8
+ from typeguard import install_import_hook
9
+
10
+
11
+ # Check type annotations via typeguard in all tests:
12
+ install_import_hook(packages=('PyHardLinkBackup',))
13
+
14
+
15
+ def pre_configure_tests() -> None:
16
+ print(f'Configure unittests via "load_tests Protocol" from {Path(__file__).relative_to(Path.cwd())}')
17
+
18
+ # Hacky way to display more "assert"-Context in failing tests:
19
+ _MIN_MAX_DIFF = unittest.util._MAX_LENGTH - unittest.util._MIN_DIFF_LEN
20
+ unittest.util._MAX_LENGTH = int(os.environ.get('UNITTEST_MAX_LENGTH', 2000))
21
+ unittest.util._MIN_DIFF_LEN = unittest.util._MAX_LENGTH - _MIN_MAX_DIFF
22
+
23
+ # Deny any request via docket/urllib3 because tests they should mock all requests:
24
+ deny_any_real_request()
25
+
26
+ # Display DEBUG logs in tests:
27
+ setup_logging(verbosity=MAX_LOG_LEVEL)
28
+
29
+
30
+ def load_tests(loader, tests, pattern):
31
+ """
32
+ Use unittest "load_tests Protocol" as a hook to setup test environment before running tests.
33
+ https://docs.python.org/3/library/unittest.html#load-tests-protocol
34
+ """
35
+ pre_configure_tests()
36
+ return loader.discover(start_dir=Path(__file__).parent, pattern=pattern)