PyHardLinkBackup 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. PyHardLinkBackup/__init__.py +7 -0
  2. PyHardLinkBackup/__main__.py +10 -0
  3. PyHardLinkBackup/backup.py +261 -0
  4. PyHardLinkBackup/cli_app/__init__.py +41 -0
  5. PyHardLinkBackup/cli_app/phlb.py +123 -0
  6. PyHardLinkBackup/cli_dev/__init__.py +70 -0
  7. PyHardLinkBackup/cli_dev/benchmark.py +138 -0
  8. PyHardLinkBackup/cli_dev/code_style.py +12 -0
  9. PyHardLinkBackup/cli_dev/packaging.py +65 -0
  10. PyHardLinkBackup/cli_dev/shell_completion.py +23 -0
  11. PyHardLinkBackup/cli_dev/testing.py +52 -0
  12. PyHardLinkBackup/cli_dev/update_readme_history.py +33 -0
  13. PyHardLinkBackup/compare_backup.py +212 -0
  14. PyHardLinkBackup/constants.py +16 -0
  15. PyHardLinkBackup/logging_setup.py +124 -0
  16. PyHardLinkBackup/rebuild_databases.py +176 -0
  17. PyHardLinkBackup/tests/__init__.py +36 -0
  18. PyHardLinkBackup/tests/test_backup.py +628 -0
  19. PyHardLinkBackup/tests/test_compare_backup.py +86 -0
  20. PyHardLinkBackup/tests/test_doc_write.py +26 -0
  21. PyHardLinkBackup/tests/test_doctests.py +10 -0
  22. PyHardLinkBackup/tests/test_project_setup.py +46 -0
  23. PyHardLinkBackup/tests/test_readme.py +75 -0
  24. PyHardLinkBackup/tests/test_readme_history.py +9 -0
  25. PyHardLinkBackup/tests/test_rebuild_database.py +224 -0
  26. PyHardLinkBackup/utilities/__init__.py +0 -0
  27. PyHardLinkBackup/utilities/file_hash_database.py +62 -0
  28. PyHardLinkBackup/utilities/file_size_database.py +46 -0
  29. PyHardLinkBackup/utilities/filesystem.py +158 -0
  30. PyHardLinkBackup/utilities/humanize.py +39 -0
  31. PyHardLinkBackup/utilities/rich_utils.py +99 -0
  32. PyHardLinkBackup/utilities/sha256sums.py +61 -0
  33. PyHardLinkBackup/utilities/tee.py +40 -0
  34. PyHardLinkBackup/utilities/tests/__init__.py +0 -0
  35. PyHardLinkBackup/utilities/tests/test_file_hash_database.py +143 -0
  36. PyHardLinkBackup/utilities/tests/test_file_size_database.py +138 -0
  37. PyHardLinkBackup/utilities/tests/test_filesystem.py +126 -0
  38. PyHardLinkBackup/utilities/tyro_cli_shared_args.py +12 -0
  39. pyhardlinkbackup-1.5.0.dist-info/METADATA +600 -0
  40. pyhardlinkbackup-1.5.0.dist-info/RECORD +42 -0
  41. pyhardlinkbackup-1.5.0.dist-info/WHEEL +4 -0
  42. pyhardlinkbackup-1.5.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,176 @@
1
+ import dataclasses
2
+ import datetime
3
+ import logging
4
+ import os
5
+ import sys
6
+ import time
7
+ from pathlib import Path
8
+
9
+ from PyHardLinkBackup.logging_setup import LoggingManager
10
+ from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
11
+ from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
12
+ from PyHardLinkBackup.utilities.filesystem import hash_file, humanized_fs_scan, iter_scandir_files
13
+ from PyHardLinkBackup.utilities.humanize import PrintTimingContextManager, human_filesize
14
+ from PyHardLinkBackup.utilities.rich_utils import DisplayFileTreeProgress
15
+ from PyHardLinkBackup.utilities.sha256sums import check_sha256sums, store_hash
16
+ from PyHardLinkBackup.utilities.tee import TeeStdoutContext
17
+
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @dataclasses.dataclass
23
+ class RebuildResult:
24
+ process_count: int = 0
25
+ process_size: int = 0
26
+ #
27
+ added_size_count: int = 0
28
+ added_hash_count: int = 0
29
+ #
30
+ error_count: int = 0
31
+ #
32
+ hash_verified_count: int = 0
33
+ hash_mismatch_count: int = 0
34
+ hash_not_found_count: int = 0
35
+
36
+
37
+ def rebuild_one_file(
38
+ *,
39
+ backup_root: Path,
40
+ entry: os.DirEntry,
41
+ size_db: FileSizeDatabase,
42
+ hash_db: FileHashDatabase,
43
+ rebuild_result: RebuildResult,
44
+ ):
45
+ file_path = Path(entry.path)
46
+
47
+ # We should ignore all files in the root backup directory itself
48
+ # e.g.: Our *-summary.txt and *.log files
49
+ if file_path.parent == backup_root:
50
+ return
51
+
52
+ rebuild_result.process_count += 1
53
+
54
+ if entry.name == 'SHA256SUMS':
55
+ # Skip existing SHA256SUMS files
56
+ return
57
+
58
+ size = entry.stat().st_size
59
+ rebuild_result.process_size += size
60
+
61
+ if size < size_db.MIN_SIZE:
62
+ # Small files will never deduplicate, skip them
63
+ return
64
+
65
+ file_hash = hash_file(file_path)
66
+
67
+ if size not in size_db:
68
+ size_db.add(size)
69
+ rebuild_result.added_size_count += 1
70
+
71
+ if file_hash not in hash_db:
72
+ hash_db[file_hash] = file_path
73
+ rebuild_result.added_hash_count += 1
74
+
75
+ # We have calculated the current hash of the file,
76
+ # Let's check if we can verify it, too:
77
+ file_path = Path(entry.path)
78
+ compare_result = check_sha256sums(
79
+ file_path=file_path,
80
+ file_hash=file_hash,
81
+ )
82
+ if compare_result is True:
83
+ rebuild_result.hash_verified_count += 1
84
+ elif compare_result is False:
85
+ rebuild_result.hash_mismatch_count += 1
86
+ elif compare_result is None:
87
+ rebuild_result.hash_not_found_count += 1
88
+ store_hash(
89
+ file_path=file_path,
90
+ file_hash=file_hash,
91
+ )
92
+
93
+
94
+ def rebuild(
95
+ backup_root: Path,
96
+ log_manager: LoggingManager,
97
+ ) -> RebuildResult:
98
+ backup_root = backup_root.resolve()
99
+ if not backup_root.is_dir():
100
+ print(f'Error: Backup directory "{backup_root}" does not exist!')
101
+ sys.exit(1)
102
+
103
+ phlb_conf_dir = backup_root / '.phlb'
104
+ if not phlb_conf_dir.is_dir():
105
+ print(
106
+ f'Error: Backup directory "{backup_root}" seems to be wrong:'
107
+ f' Our hidden ".phlb" configuration directory is missing!'
108
+ )
109
+ sys.exit(1)
110
+
111
+ timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S')
112
+ log_manager.start_file_logging(log_file=backup_root / f'{timestamp}-rebuild.log')
113
+
114
+ with PrintTimingContextManager('Filesystem scan completed in'):
115
+ file_count, total_size = humanized_fs_scan(backup_root, excludes={'.phlb'})
116
+
117
+ # We should ignore all files in the root backup directory itself
118
+ # e.g.: Our *-summary.txt and *.log files
119
+ for file in backup_root.iterdir():
120
+ if file.is_file():
121
+ file_count -= 1
122
+ total_size -= file.stat().st_size
123
+
124
+ with DisplayFileTreeProgress(file_count, total_size) as progress:
125
+ # "Databases" for deduplication
126
+ size_db = FileSizeDatabase(phlb_conf_dir)
127
+ hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
128
+
129
+ rebuild_result = RebuildResult()
130
+
131
+ next_update = 0
132
+ for entry in iter_scandir_files(backup_root, excludes={'.phlb'}):
133
+ try:
134
+ rebuild_one_file(
135
+ backup_root=backup_root,
136
+ entry=entry,
137
+ size_db=size_db,
138
+ hash_db=hash_db,
139
+ rebuild_result=rebuild_result,
140
+ )
141
+ except Exception as err:
142
+ logger.exception(f'Backup {entry.path} {err.__class__.__name__}: {err}')
143
+ rebuild_result.error_count += 1
144
+ else:
145
+ now = time.monotonic()
146
+ if now >= next_update:
147
+ progress.update(
148
+ completed_file_count=rebuild_result.process_count, completed_size=rebuild_result.process_size
149
+ )
150
+ next_update = now + 0.5
151
+
152
+ # Finalize progress indicator values:
153
+ progress.update(completed_file_count=rebuild_result.process_count, completed_size=rebuild_result.process_size)
154
+
155
+ summary_file = backup_root / f'{timestamp}-rebuild-summary.txt'
156
+ with TeeStdoutContext(summary_file):
157
+ print(f'\nRebuild "{backup_root}" completed:')
158
+ print(f' Total files processed: {rebuild_result.process_count}')
159
+ print(f' Total size processed: {human_filesize(rebuild_result.process_size)}')
160
+
161
+ print(f' Added file size information entries: {rebuild_result.added_size_count}')
162
+ print(f' Added file hash entries: {rebuild_result.added_hash_count}')
163
+
164
+ if rebuild_result.error_count > 0:
165
+ print(f' Errors during rebuild: {rebuild_result.error_count} (see log for details)')
166
+
167
+ print('\nSHA256SUMS verification results:')
168
+ print(f' Successfully verified files: {rebuild_result.hash_verified_count}')
169
+ print(f' File hash mismatches: {rebuild_result.hash_mismatch_count}')
170
+ print(f' File hashes not found, newly stored: {rebuild_result.hash_not_found_count}')
171
+
172
+ print()
173
+
174
+ logger.info('Rebuild completed. Summary created: %s', summary_file)
175
+
176
+ return rebuild_result
@@ -0,0 +1,36 @@
1
+ import os
2
+ import unittest.util
3
+ from pathlib import Path
4
+
5
+ from bx_py_utils.test_utils.deny_requests import deny_any_real_request
6
+ from cli_base.cli_tools.verbosity import MAX_LOG_LEVEL, setup_logging
7
+ from rich import print # noqa
8
+ from typeguard import install_import_hook
9
+
10
+
11
+ # Check type annotations via typeguard in all tests:
12
+ install_import_hook(packages=('PyHardLinkBackup',))
13
+
14
+
15
+ def pre_configure_tests() -> None:
16
+ print(f'Configure unittests via "load_tests Protocol" from {Path(__file__).relative_to(Path.cwd())}')
17
+
18
+ # Hacky way to display more "assert"-Context in failing tests:
19
+ _MIN_MAX_DIFF = unittest.util._MAX_LENGTH - unittest.util._MIN_DIFF_LEN
20
+ unittest.util._MAX_LENGTH = int(os.environ.get('UNITTEST_MAX_LENGTH', 2000))
21
+ unittest.util._MIN_DIFF_LEN = unittest.util._MAX_LENGTH - _MIN_MAX_DIFF
22
+
23
+ # Deny any request via docket/urllib3 because tests they should mock all requests:
24
+ deny_any_real_request()
25
+
26
+ # Display DEBUG logs in tests:
27
+ setup_logging(verbosity=MAX_LOG_LEVEL)
28
+
29
+
30
+ def load_tests(loader, tests, pattern):
31
+ """
32
+ Use unittest "load_tests Protocol" as a hook to setup test environment before running tests.
33
+ https://docs.python.org/3/library/unittest.html#load-tests-protocol
34
+ """
35
+ pre_configure_tests()
36
+ return loader.discover(start_dir=Path(__file__).parent, pattern=pattern)