PyHardLinkBackup 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. PyHardLinkBackup/__init__.py +7 -0
  2. PyHardLinkBackup/__main__.py +10 -0
  3. PyHardLinkBackup/backup.py +261 -0
  4. PyHardLinkBackup/cli_app/__init__.py +41 -0
  5. PyHardLinkBackup/cli_app/phlb.py +123 -0
  6. PyHardLinkBackup/cli_dev/__init__.py +70 -0
  7. PyHardLinkBackup/cli_dev/benchmark.py +138 -0
  8. PyHardLinkBackup/cli_dev/code_style.py +12 -0
  9. PyHardLinkBackup/cli_dev/packaging.py +65 -0
  10. PyHardLinkBackup/cli_dev/shell_completion.py +23 -0
  11. PyHardLinkBackup/cli_dev/testing.py +52 -0
  12. PyHardLinkBackup/cli_dev/update_readme_history.py +33 -0
  13. PyHardLinkBackup/compare_backup.py +212 -0
  14. PyHardLinkBackup/constants.py +16 -0
  15. PyHardLinkBackup/logging_setup.py +124 -0
  16. PyHardLinkBackup/rebuild_databases.py +176 -0
  17. PyHardLinkBackup/tests/__init__.py +36 -0
  18. PyHardLinkBackup/tests/test_backup.py +628 -0
  19. PyHardLinkBackup/tests/test_compare_backup.py +86 -0
  20. PyHardLinkBackup/tests/test_doc_write.py +26 -0
  21. PyHardLinkBackup/tests/test_doctests.py +10 -0
  22. PyHardLinkBackup/tests/test_project_setup.py +46 -0
  23. PyHardLinkBackup/tests/test_readme.py +75 -0
  24. PyHardLinkBackup/tests/test_readme_history.py +9 -0
  25. PyHardLinkBackup/tests/test_rebuild_database.py +224 -0
  26. PyHardLinkBackup/utilities/__init__.py +0 -0
  27. PyHardLinkBackup/utilities/file_hash_database.py +62 -0
  28. PyHardLinkBackup/utilities/file_size_database.py +46 -0
  29. PyHardLinkBackup/utilities/filesystem.py +158 -0
  30. PyHardLinkBackup/utilities/humanize.py +39 -0
  31. PyHardLinkBackup/utilities/rich_utils.py +99 -0
  32. PyHardLinkBackup/utilities/sha256sums.py +61 -0
  33. PyHardLinkBackup/utilities/tee.py +40 -0
  34. PyHardLinkBackup/utilities/tests/__init__.py +0 -0
  35. PyHardLinkBackup/utilities/tests/test_file_hash_database.py +143 -0
  36. PyHardLinkBackup/utilities/tests/test_file_size_database.py +138 -0
  37. PyHardLinkBackup/utilities/tests/test_filesystem.py +126 -0
  38. PyHardLinkBackup/utilities/tyro_cli_shared_args.py +12 -0
  39. pyhardlinkbackup-1.5.0.dist-info/METADATA +600 -0
  40. pyhardlinkbackup-1.5.0.dist-info/RECORD +42 -0
  41. pyhardlinkbackup-1.5.0.dist-info/WHEEL +4 -0
  42. pyhardlinkbackup-1.5.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,65 @@
1
+ import logging
2
+
3
+ from cli_base.cli_tools.dev_tools import run_unittest_cli
4
+ from cli_base.cli_tools.subprocess_utils import ToolsExecutor
5
+ from cli_base.cli_tools.verbosity import setup_logging
6
+ from cli_base.run_pip_audit import run_pip_audit
7
+ from cli_base.tyro_commands import TyroVerbosityArgType
8
+ from manageprojects.utilities.publish import publish_package
9
+
10
+ import PyHardLinkBackup
11
+ from PyHardLinkBackup.cli_dev import PACKAGE_ROOT, app
12
+
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ @app.command
18
+ def install():
19
+ """
20
+ Install requirements and 'PyHardLinkBackup' via pip as editable.
21
+ """
22
+ tools_executor = ToolsExecutor(cwd=PACKAGE_ROOT)
23
+ tools_executor.verbose_check_call('uv', 'sync')
24
+ tools_executor.verbose_check_call('pip', 'install', '--no-deps', '-e', '.')
25
+
26
+
27
+ @app.command
28
+ def pip_audit(verbosity: TyroVerbosityArgType):
29
+ """
30
+ Run pip-audit check against current requirements files
31
+ """
32
+ setup_logging(verbosity=verbosity)
33
+ run_pip_audit(base_path=PACKAGE_ROOT, verbosity=verbosity)
34
+
35
+
36
+ @app.command
37
+ def update(verbosity: TyroVerbosityArgType):
38
+ """
39
+ Update dependencies (uv.lock) and git pre-commit hooks
40
+ """
41
+ setup_logging(verbosity=verbosity)
42
+
43
+ tools_executor = ToolsExecutor(cwd=PACKAGE_ROOT)
44
+
45
+ tools_executor.verbose_check_call('pip', 'install', '-U', 'pip')
46
+ tools_executor.verbose_check_call('pip', 'install', '-U', 'uv')
47
+ tools_executor.verbose_check_call('uv', 'lock', '--upgrade')
48
+
49
+ run_pip_audit(base_path=PACKAGE_ROOT, verbosity=verbosity)
50
+
51
+ # Install new dependencies in current .venv:
52
+ tools_executor.verbose_check_call('uv', 'sync')
53
+
54
+ # Update git pre-commit hooks:
55
+ tools_executor.verbose_check_call('pre-commit', 'autoupdate')
56
+
57
+
58
+ @app.command
59
+ def publish():
60
+ """
61
+ Build and upload this project to PyPi
62
+ """
63
+ run_unittest_cli(verbose=False, exit_after_run=False) # Don't publish a broken state
64
+
65
+ publish_package(module=PyHardLinkBackup, package_path=PACKAGE_ROOT)
@@ -0,0 +1,23 @@
1
+ import logging
2
+
3
+ from cli_base.cli_tools.shell_completion import setup_tyro_shell_completion
4
+ from cli_base.cli_tools.verbosity import setup_logging
5
+ from cli_base.tyro_commands import TyroVerbosityArgType
6
+ from rich import print # noqa
7
+
8
+ from PyHardLinkBackup.cli_dev import app
9
+
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ @app.command
15
+ def shell_completion(verbosity: TyroVerbosityArgType = 1, remove: bool = False) -> None:
16
+ """
17
+ Setup shell completion for this CLI (Currently only for bash shell)
18
+ """
19
+ setup_logging(verbosity=verbosity)
20
+ setup_tyro_shell_completion(
21
+ prog_name='PyHardLinkBackup_dev_cli',
22
+ remove=remove,
23
+ )
@@ -0,0 +1,52 @@
1
+ from cli_base.cli_tools.dev_tools import run_coverage, run_nox, run_unittest_cli
2
+ from cli_base.cli_tools.subprocess_utils import verbose_check_call
3
+ from cli_base.cli_tools.test_utils.snapshot import UpdateTestSnapshotFiles
4
+ from cli_base.tyro_commands import TyroVerbosityArgType
5
+
6
+ from PyHardLinkBackup.cli_dev import PACKAGE_ROOT, app
7
+
8
+
9
+ @app.command
10
+ def mypy(verbosity: TyroVerbosityArgType):
11
+ """Run Mypy (configured in pyproject.toml)"""
12
+ verbose_check_call('mypy', '.', cwd=PACKAGE_ROOT, verbose=verbosity > 0, exit_on_error=True)
13
+
14
+
15
+ @app.command
16
+ def update_test_snapshot_files(verbosity: TyroVerbosityArgType):
17
+ """
18
+ Update all test snapshot files (by remove and recreate all snapshot files)
19
+ """
20
+ with UpdateTestSnapshotFiles(root_path=PACKAGE_ROOT, verbose=verbosity > 0):
21
+ # Just recreate them by running tests:
22
+ run_unittest_cli(
23
+ extra_env=dict(
24
+ RAISE_SNAPSHOT_ERRORS='0', # Recreate snapshot files without error
25
+ ),
26
+ verbose=verbosity > 1,
27
+ exit_after_run=False,
28
+ )
29
+
30
+
31
+ @app.command # Dummy command
32
+ def test():
33
+ """
34
+ Run unittests
35
+ """
36
+ run_unittest_cli()
37
+
38
+
39
+ @app.command # Dummy command
40
+ def coverage():
41
+ """
42
+ Run tests and show coverage report.
43
+ """
44
+ run_coverage()
45
+
46
+
47
+ @app.command # Dummy "nox" command
48
+ def nox():
49
+ """
50
+ Run nox
51
+ """
52
+ run_nox()
@@ -0,0 +1,33 @@
1
+ import logging
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ from cli_base.cli_tools import git_history
6
+ from cli_base.cli_tools.verbosity import setup_logging
7
+ from cli_base.tyro_commands import TyroVerbosityArgType
8
+ from rich import print # noqa
9
+
10
+ from PyHardLinkBackup.cli_dev import app
11
+
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ @app.command
17
+ def update_readme_history(verbosity: TyroVerbosityArgType):
18
+ """
19
+ Update project history base on git commits/tags in README.md
20
+
21
+ Will be exited with 1 if the README.md was updated otherwise with 0.
22
+
23
+ Also, callable via e.g.:
24
+ python -m cli_base update-readme-history -v
25
+ """
26
+ setup_logging(verbosity=verbosity)
27
+
28
+ logger.debug('%s called. CWD: %s', __name__, Path.cwd())
29
+ updated = git_history.update_readme_history(verbosity=verbosity)
30
+ exit_code = 1 if updated else 0
31
+ if verbosity:
32
+ print(f'{exit_code=}')
33
+ sys.exit(exit_code)
@@ -0,0 +1,212 @@
1
+ import dataclasses
2
+ import datetime
3
+ import logging
4
+ import os
5
+ import sys
6
+ import time
7
+ from pathlib import Path
8
+
9
+ from rich import print # noqa
10
+
11
+ from PyHardLinkBackup.logging_setup import LoggingManager
12
+ from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
13
+ from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
14
+ from PyHardLinkBackup.utilities.filesystem import (
15
+ hash_file,
16
+ humanized_fs_scan,
17
+ iter_scandir_files,
18
+ )
19
+ from PyHardLinkBackup.utilities.humanize import PrintTimingContextManager, human_filesize
20
+ from PyHardLinkBackup.utilities.rich_utils import DisplayFileTreeProgress
21
+ from PyHardLinkBackup.utilities.tee import TeeStdoutContext
22
+
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ @dataclasses.dataclass
28
+ class CompareResult:
29
+ compare_dir: Path
30
+ log_file: Path
31
+ #
32
+ total_file_count: int = 0
33
+ total_size: int = 0
34
+ #
35
+ src_file_new_count: int = 0
36
+ file_size_missmatch: int = 0
37
+ file_hash_missmatch: int = 0
38
+ #
39
+ small_file_count: int = 0
40
+ size_db_missing_count: int = 0
41
+ hash_db_missing_count: int = 0
42
+ #
43
+ successful_file_count: int = 0
44
+ error_count: int = 0
45
+
46
+
47
+ def compare_one_file(
48
+ *,
49
+ src_root: Path,
50
+ entry: os.DirEntry,
51
+ size_db: FileSizeDatabase,
52
+ hash_db: FileHashDatabase,
53
+ compare_dir: Path,
54
+ compare_result: CompareResult,
55
+ ) -> None:
56
+ src_size = entry.stat().st_size
57
+
58
+ # For the progress bars:
59
+ compare_result.total_file_count += 1
60
+ compare_result.total_size += src_size
61
+
62
+ src_path = Path(entry.path)
63
+ dst_path = compare_dir / src_path.relative_to(src_root)
64
+
65
+ if not dst_path.exists():
66
+ logger.warning('Source file %s not found in compare %s', src_path, dst_path)
67
+ compare_result.src_file_new_count += 1
68
+ return
69
+
70
+ dst_size = dst_path.stat().st_size
71
+ if src_size != dst_size:
72
+ logger.warning(
73
+ 'Source file %s size (%i Bytes) differs from compare file %s size (%iBytes)',
74
+ src_path,
75
+ src_size,
76
+ dst_path,
77
+ dst_size,
78
+ )
79
+ compare_result.file_size_missmatch += 1
80
+ return
81
+
82
+ src_hash = hash_file(src_path)
83
+ dst_hash = hash_file(dst_path)
84
+
85
+ if src_hash != dst_hash:
86
+ logger.warning(
87
+ 'Source file %s hash %r differs from compare file %s hash (%s)',
88
+ src_path,
89
+ src_hash,
90
+ dst_path,
91
+ dst_hash,
92
+ )
93
+ compare_result.file_hash_missmatch += 1
94
+ return
95
+
96
+ if src_size < size_db.MIN_SIZE:
97
+ # Small file -> Not in deduplication database
98
+ compare_result.small_file_count += 1
99
+ else:
100
+ if src_size not in size_db:
101
+ logger.warning(
102
+ 'Source file %s size (%i Bytes) not found in deduplication database',
103
+ src_path,
104
+ src_size,
105
+ )
106
+ compare_result.size_db_missing_count += 1
107
+
108
+ if src_hash not in hash_db:
109
+ logger.warning(
110
+ 'Source file %s hash %r not found in deduplication database',
111
+ src_path,
112
+ src_hash,
113
+ )
114
+ compare_result.hash_db_missing_count += 1
115
+
116
+ # Everything is ok
117
+ compare_result.successful_file_count += 1
118
+
119
+
120
+ def compare_tree(
121
+ *,
122
+ src_root: Path,
123
+ backup_root: Path,
124
+ excludes: tuple[str, ...],
125
+ log_manager: LoggingManager,
126
+ ) -> CompareResult:
127
+ src_root = src_root.resolve()
128
+ if not src_root.is_dir():
129
+ print('Error: Source directory does not exist!')
130
+ print(f'Please check source directory: "{src_root}"\n')
131
+ sys.exit(1)
132
+
133
+ backup_root = backup_root.resolve()
134
+ phlb_conf_dir = backup_root / '.phlb'
135
+ if not phlb_conf_dir.is_dir():
136
+ print('Error: Compare directory seems to be wrong! (No .phlb configuration directory found)')
137
+ print(f'Please check backup directory: "{backup_root}"\n')
138
+ sys.exit(1)
139
+
140
+ compare_main_dir = backup_root / src_root.name
141
+ timestamps = sorted(
142
+ path.name for path in compare_main_dir.iterdir() if path.is_dir() and path.name.startswith('20')
143
+ )
144
+ print(f'Found {len(timestamps)} compare(s) in {compare_main_dir}:')
145
+ for timestamp in timestamps:
146
+ print(f' * {timestamp}')
147
+ last_timestamp = timestamps[-1]
148
+ compare_dir = compare_main_dir / last_timestamp
149
+ print(f'\nComparing source tree {src_root} with {last_timestamp} compare:')
150
+ print(f' {compare_dir}\n')
151
+
152
+ now_timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S')
153
+ log_file = compare_main_dir / f'{now_timestamp}-compare.log'
154
+ log_manager.start_file_logging(log_file)
155
+
156
+ excludes: set = set(excludes)
157
+ with PrintTimingContextManager('Filesystem scan completed in'):
158
+ src_file_count, src_total_size = humanized_fs_scan(src_root, excludes=excludes)
159
+
160
+ with DisplayFileTreeProgress(src_file_count, src_total_size) as progress:
161
+ # init "databases":
162
+ size_db = FileSizeDatabase(phlb_conf_dir)
163
+ hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
164
+
165
+ compare_result = CompareResult(compare_dir=compare_dir, log_file=log_file)
166
+
167
+ next_update = 0
168
+ for entry in iter_scandir_files(src_root, excludes=excludes):
169
+ try:
170
+ compare_one_file(
171
+ src_root=src_root,
172
+ entry=entry,
173
+ size_db=size_db,
174
+ hash_db=hash_db,
175
+ compare_dir=compare_dir,
176
+ compare_result=compare_result,
177
+ )
178
+ except Exception as err:
179
+ logger.exception(f'Compare {entry.path} {err.__class__.__name__}: {err}')
180
+ compare_result.error_count += 1
181
+ else:
182
+ now = time.monotonic()
183
+ if now >= next_update:
184
+ progress.update(
185
+ completed_file_count=compare_result.total_file_count,
186
+ completed_size=compare_result.total_size,
187
+ )
188
+ next_update = now + 0.5
189
+
190
+ # Finalize progress indicator values:
191
+ progress.update(completed_file_count=compare_result.total_file_count, completed_size=compare_result.total_size)
192
+
193
+ summary_file = compare_main_dir / f'{now_timestamp}-summary.txt'
194
+ with TeeStdoutContext(summary_file):
195
+ print(f'\nCompare complete: {compare_dir} (total size {human_filesize(compare_result.total_size)})\n')
196
+ print(f' Total files processed: {compare_result.total_file_count}')
197
+ print(f' * Successful compared files: {compare_result.successful_file_count}')
198
+ print(f' * New source files: {compare_result.src_file_new_count}')
199
+ print(f' * File size missmatch: {compare_result.file_size_missmatch}')
200
+ print(f' * File hash missmatch: {compare_result.file_hash_missmatch}')
201
+
202
+ print(f' * Small (<{size_db.MIN_SIZE} Bytes) files: {compare_result.small_file_count}')
203
+ print(f' * Missing in size DB: {compare_result.size_db_missing_count}')
204
+ print(f' * Missing in hash DB: {compare_result.hash_db_missing_count}')
205
+
206
+ if compare_result.error_count > 0:
207
+ print(f' Errors during compare: {compare_result.error_count} (see log for details)')
208
+ print()
209
+
210
+ logger.info('Compare completed. Summary created: %s', summary_file)
211
+
212
+ return compare_result
@@ -0,0 +1,16 @@
1
+ from pathlib import Path
2
+
3
+ import PyHardLinkBackup
4
+
5
+
6
+ CLI_EPILOG = 'Project Homepage: https://github.com/jedie/PyHardLinkBackup'
7
+
8
+ BASE_PATH = Path(PyHardLinkBackup.__file__).parent
9
+
10
+
11
+ ##########################################################################
12
+ # "Settings" for PyHardLinkBackup:
13
+
14
+ CHUNK_SIZE = 64 * 1024 * 1024 # 64 MB
15
+ SMALL_FILE_THRESHOLD = 1000 # bytes
16
+ HASH_ALGO = 'sha256'
@@ -0,0 +1,124 @@
1
+ import logging
2
+ import sys
3
+ from pathlib import Path
4
+ from typing import Annotated, Literal
5
+
6
+ import tyro
7
+ from bx_py_utils.path import assert_is_dir
8
+ from rich import (
9
+ get_console,
10
+ print, # noqa
11
+ )
12
+ from rich.logging import RichHandler
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ LogLevelLiteral = Literal['debug', 'info', 'warning', 'error']
18
+
19
+
20
+ TyroConsoleLogLevelArgType = Annotated[
21
+ LogLevelLiteral,
22
+ tyro.conf.arg(
23
+ help='Log level for console logging.',
24
+ ),
25
+ ]
26
+ DEFAULT_CONSOLE_LOG_LEVEL: TyroConsoleLogLevelArgType = 'warning'
27
+
28
+
29
+ TyroLogFileLevelArgType = Annotated[
30
+ LogLevelLiteral,
31
+ tyro.conf.arg(
32
+ help='Log level for the log file',
33
+ ),
34
+ ]
35
+ DEFAULT_LOG_FILE_LEVEL: TyroLogFileLevelArgType = 'info'
36
+
37
+
38
+ def log_level_name2int(level_name: str) -> int:
39
+ level_name = level_name.upper()
40
+ level_mapping = logging.getLevelNamesMapping()
41
+ try:
42
+ return level_mapping[level_name]
43
+ except KeyError as err:
44
+ raise ValueError(f'Invalid log level name: {level_name}') from err
45
+
46
+
47
+ console = get_console()
48
+
49
+
50
+ class LoggingManager:
51
+ def __init__(
52
+ self,
53
+ *,
54
+ console_level: TyroConsoleLogLevelArgType,
55
+ file_level: TyroLogFileLevelArgType,
56
+ ):
57
+ self.console_level_name = console_level
58
+ self.console_level: int = log_level_name2int(console_level)
59
+ self.file_level_name = file_level
60
+ self.file_level: int = log_level_name2int(file_level)
61
+
62
+ self.lowest_level = min(self.console_level, self.file_level)
63
+
64
+ if console_level == logging.DEBUG:
65
+ log_format = '(%(name)s) %(message)s'
66
+ else:
67
+ log_format = '%(message)s'
68
+
69
+ console.print(
70
+ f'(Set [bold]console[bold] log level: [cyan]{self.console_level_name}[/cyan])',
71
+ justify='right',
72
+ )
73
+ handler = RichHandler(console=console, omit_repeated_times=False)
74
+ handler.setLevel(self.console_level)
75
+ logging.basicConfig(
76
+ level=self.lowest_level,
77
+ format=log_format,
78
+ datefmt='[%x %X.%f]',
79
+ handlers=[handler],
80
+ force=True,
81
+ )
82
+ sys.excepthook = self.log_unhandled_exception
83
+
84
+ def start_file_logging(self, log_file: Path):
85
+ console.print(
86
+ f'(initialize log file [bold]{log_file}[/bold] with level: [cyan]{self.file_level_name}[/cyan])',
87
+ justify='right',
88
+ )
89
+
90
+ assert_is_dir(log_file.parent)
91
+
92
+ root_logger = logging.getLogger()
93
+
94
+ file_handler = logging.FileHandler(log_file, mode='a', encoding='utf-8')
95
+ file_handler.setLevel(self.file_level)
96
+
97
+ formatter = logging.Formatter(
98
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
99
+ datefmt='%Y-%m-%d %H:%M:%S',
100
+ )
101
+ file_handler.setFormatter(formatter)
102
+
103
+ root_logger.addHandler(file_handler)
104
+
105
+ def log_unhandled_exception(self, exc_type, exc_value, exc_traceback):
106
+ if issubclass(exc_type, KeyboardInterrupt):
107
+ logger.info('Program interrupted by user (KeyboardInterrupt). Exiting...')
108
+ sys.__excepthook__(exc_type, exc_value, exc_traceback)
109
+ else:
110
+ logger.exception(
111
+ 'Unhandled exception occurred:',
112
+ exc_info=(exc_type, exc_value, exc_traceback),
113
+ )
114
+
115
+
116
+ class NoopLoggingManager(LoggingManager):
117
+ """
118
+ Only for tests: A logging manager that does nothing.
119
+ """
120
+ def __init__(self, *args, **kwargs):
121
+ pass
122
+
123
+ def start_file_logging(self, log_file: Path):
124
+ pass