PyHardLinkBackup 1.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyHardLinkBackup/__init__.py +7 -0
- PyHardLinkBackup/__main__.py +10 -0
- PyHardLinkBackup/backup.py +297 -0
- PyHardLinkBackup/cli_app/__init__.py +41 -0
- PyHardLinkBackup/cli_app/phlb.py +136 -0
- PyHardLinkBackup/cli_dev/__init__.py +70 -0
- PyHardLinkBackup/cli_dev/__main__.py +10 -0
- PyHardLinkBackup/cli_dev/benchmark.py +138 -0
- PyHardLinkBackup/cli_dev/code_style.py +12 -0
- PyHardLinkBackup/cli_dev/debugging.py +47 -0
- PyHardLinkBackup/cli_dev/packaging.py +62 -0
- PyHardLinkBackup/cli_dev/shell_completion.py +23 -0
- PyHardLinkBackup/cli_dev/testing.py +52 -0
- PyHardLinkBackup/cli_dev/update_readme_history.py +33 -0
- PyHardLinkBackup/compare_backup.py +259 -0
- PyHardLinkBackup/constants.py +18 -0
- PyHardLinkBackup/logging_setup.py +124 -0
- PyHardLinkBackup/rebuild_databases.py +217 -0
- PyHardLinkBackup/tests/__init__.py +36 -0
- PyHardLinkBackup/tests/test_backup.py +1167 -0
- PyHardLinkBackup/tests/test_compare_backup.py +167 -0
- PyHardLinkBackup/tests/test_doc_write.py +26 -0
- PyHardLinkBackup/tests/test_doctests.py +10 -0
- PyHardLinkBackup/tests/test_project_setup.py +46 -0
- PyHardLinkBackup/tests/test_readme.py +75 -0
- PyHardLinkBackup/tests/test_readme_history.py +9 -0
- PyHardLinkBackup/tests/test_rebuild_database.py +266 -0
- PyHardLinkBackup/utilities/__init__.py +0 -0
- PyHardLinkBackup/utilities/file_hash_database.py +62 -0
- PyHardLinkBackup/utilities/file_size_database.py +46 -0
- PyHardLinkBackup/utilities/filesystem.py +257 -0
- PyHardLinkBackup/utilities/humanize.py +39 -0
- PyHardLinkBackup/utilities/rich_utils.py +237 -0
- PyHardLinkBackup/utilities/sha256sums.py +61 -0
- PyHardLinkBackup/utilities/tee.py +40 -0
- PyHardLinkBackup/utilities/tests/__init__.py +0 -0
- PyHardLinkBackup/utilities/tests/test_file_hash_database.py +153 -0
- PyHardLinkBackup/utilities/tests/test_file_size_database.py +151 -0
- PyHardLinkBackup/utilities/tests/test_filesystem.py +167 -0
- PyHardLinkBackup/utilities/tests/unittest_utilities.py +78 -0
- PyHardLinkBackup/utilities/tyro_cli_shared_args.py +29 -0
- pyhardlinkbackup-1.8.1.dist-info/METADATA +700 -0
- pyhardlinkbackup-1.8.1.dist-info/RECORD +45 -0
- pyhardlinkbackup-1.8.1.dist-info/WHEEL +4 -0
- pyhardlinkbackup-1.8.1.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
import hashlib
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from bx_py_utils.path import assert_is_dir
|
|
8
|
+
from cli_base.cli_tools.verbosity import setup_logging
|
|
9
|
+
from cli_base.tyro_commands import TyroVerbosityArgType
|
|
10
|
+
from rich import print # noqa
|
|
11
|
+
|
|
12
|
+
from PyHardLinkBackup.cli_dev import app
|
|
13
|
+
from PyHardLinkBackup.utilities.filesystem import humanized_fs_scan, iter_scandir_files
|
|
14
|
+
from PyHardLinkBackup.utilities.humanize import PrintTimingContextManager
|
|
15
|
+
from PyHardLinkBackup.utilities.tyro_cli_shared_args import DEFAULT_EXCLUDE_DIRECTORIES, TyroExcludeDirectoriesArgType
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@app.command
|
|
22
|
+
def scan_benchmark(
|
|
23
|
+
base_path: Path,
|
|
24
|
+
/,
|
|
25
|
+
excludes: TyroExcludeDirectoriesArgType = DEFAULT_EXCLUDE_DIRECTORIES,
|
|
26
|
+
verbosity: TyroVerbosityArgType = 1,
|
|
27
|
+
) -> None:
|
|
28
|
+
"""
|
|
29
|
+
Benchmark our filesystem scan routine.
|
|
30
|
+
"""
|
|
31
|
+
setup_logging(verbosity=verbosity)
|
|
32
|
+
with PrintTimingContextManager('Filesystem scan completed in'):
|
|
33
|
+
humanized_fs_scan(path=base_path, excludes=set(excludes))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@app.command
|
|
37
|
+
def benchmark_hashes(
|
|
38
|
+
base_path: Path,
|
|
39
|
+
/,
|
|
40
|
+
excludes: TyroExcludeDirectoriesArgType = DEFAULT_EXCLUDE_DIRECTORIES,
|
|
41
|
+
max_duration: int = 30, # in seconds
|
|
42
|
+
min_file_size: int = 15 * 1024, # 15 KiB
|
|
43
|
+
max_file_size: int = 100 * 1024 * 1024, # 100 MiB
|
|
44
|
+
verbosity: TyroVerbosityArgType = 1,
|
|
45
|
+
) -> None:
|
|
46
|
+
"""
|
|
47
|
+
Benchmark different file hashing algorithms on the given path.
|
|
48
|
+
"""
|
|
49
|
+
# Example output:
|
|
50
|
+
#
|
|
51
|
+
# Total files hashed: 220, total size: 1187.7 MiB
|
|
52
|
+
#
|
|
53
|
+
# Results:
|
|
54
|
+
# Total file content read time: 1.7817s
|
|
55
|
+
#
|
|
56
|
+
# sha1 | Total: 0.6827s | 0.4x hash/read
|
|
57
|
+
# sha256 | Total: 0.7189s | 0.4x hash/read
|
|
58
|
+
# sha224 | Total: 0.7375s | 0.4x hash/read
|
|
59
|
+
# sha384 | Total: 1.6552s | 0.9x hash/read
|
|
60
|
+
# blake2b | Total: 1.6708s | 0.9x hash/read
|
|
61
|
+
# md5 | Total: 1.6870s | 0.9x hash/read
|
|
62
|
+
# sha512 | Total: 1.7269s | 1.0x hash/read
|
|
63
|
+
# shake_128 | Total: 1.9834s | 1.1x hash/read
|
|
64
|
+
# sha3_224 | Total: 2.3006s | 1.3x hash/read
|
|
65
|
+
# sha3_256 | Total: 2.3856s | 1.3x hash/read
|
|
66
|
+
# shake_256 | Total: 2.4375s | 1.4x hash/read
|
|
67
|
+
# blake2s | Total: 2.5219s | 1.4x hash/read
|
|
68
|
+
# sha3_384 | Total: 3.2596s | 1.8x hash/read
|
|
69
|
+
# sha3_512 | Total: 4.5328s | 2.5x hash/read
|
|
70
|
+
setup_logging(verbosity=verbosity)
|
|
71
|
+
assert_is_dir(base_path)
|
|
72
|
+
print(f'Benchmarking file hashes under: {base_path}')
|
|
73
|
+
|
|
74
|
+
print(f'Min file size: {min_file_size} bytes')
|
|
75
|
+
print(f'Max file size: {max_file_size} bytes')
|
|
76
|
+
print(f'Max duration: {max_duration} seconds')
|
|
77
|
+
|
|
78
|
+
algorithms = sorted(hashlib.algorithms_guaranteed)
|
|
79
|
+
print(f'\nUsing {len(algorithms)} guaranteed algorithms: {algorithms}')
|
|
80
|
+
print('-' * 80)
|
|
81
|
+
|
|
82
|
+
file_count = 0
|
|
83
|
+
total_size = 0
|
|
84
|
+
total_read_time = 0.0
|
|
85
|
+
results = collections.defaultdict(set)
|
|
86
|
+
|
|
87
|
+
start_time = time.time()
|
|
88
|
+
stop_time = start_time + max_duration
|
|
89
|
+
next_update = start_time + 2
|
|
90
|
+
|
|
91
|
+
with PrintTimingContextManager('Filesystem scan completed in'):
|
|
92
|
+
for dir_entry in iter_scandir_files(path=base_path, excludes=set(excludes)):
|
|
93
|
+
entry_stat = dir_entry.stat()
|
|
94
|
+
file_size = entry_stat.st_size
|
|
95
|
+
if not (min_file_size <= file_size <= max_file_size):
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
start_time = time.perf_counter()
|
|
99
|
+
file_content = Path(dir_entry.path).read_bytes()
|
|
100
|
+
duration = time.perf_counter() - start_time
|
|
101
|
+
total_read_time += duration
|
|
102
|
+
|
|
103
|
+
for algo in algorithms:
|
|
104
|
+
# Actual measurement:
|
|
105
|
+
start_time = time.perf_counter()
|
|
106
|
+
hashlib.new(algo, file_content)
|
|
107
|
+
duration = time.perf_counter() - start_time
|
|
108
|
+
|
|
109
|
+
results[algo].add(duration)
|
|
110
|
+
|
|
111
|
+
file_count += 1
|
|
112
|
+
total_size += entry_stat.st_size
|
|
113
|
+
|
|
114
|
+
now = time.time()
|
|
115
|
+
if now >= stop_time:
|
|
116
|
+
print('Reached max duration limit, stopping benchmark...')
|
|
117
|
+
break
|
|
118
|
+
|
|
119
|
+
if now >= next_update:
|
|
120
|
+
percent = (now - (stop_time - max_duration)) / max_duration * 100
|
|
121
|
+
print(
|
|
122
|
+
f'{int(percent)}% Processed {file_count} files so far,'
|
|
123
|
+
f' total size: {total_size / 1024 / 1024:.1f} MiB...'
|
|
124
|
+
)
|
|
125
|
+
next_update = now + 2
|
|
126
|
+
|
|
127
|
+
print(f'\nTotal files hashed: {file_count}, total size: {total_size / 1024 / 1024:.1f} MiB')
|
|
128
|
+
|
|
129
|
+
print('\nResults:')
|
|
130
|
+
print(f'Total file content read time: {total_read_time:.4f}s\n')
|
|
131
|
+
|
|
132
|
+
sorted_results = sorted(
|
|
133
|
+
((algo, sum(durations)) for algo, durations in results.items()),
|
|
134
|
+
key=lambda x: x[1], # Sort by total_duration
|
|
135
|
+
)
|
|
136
|
+
for algo, total_duration in sorted_results:
|
|
137
|
+
ratio = total_duration / total_read_time
|
|
138
|
+
print(f'{algo:10} | Total: {total_duration:.4f}s | {ratio:.1f}x hash/read')
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from cli_base.cli_tools.code_style import assert_code_style
|
|
2
|
+
from cli_base.tyro_commands import TyroVerbosityArgType
|
|
3
|
+
|
|
4
|
+
from PyHardLinkBackup.cli_dev import PACKAGE_ROOT, app
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@app.command
|
|
8
|
+
def lint(verbosity: TyroVerbosityArgType = 1):
|
|
9
|
+
"""
|
|
10
|
+
Check/fix code style by run: "ruff check --fix"
|
|
11
|
+
"""
|
|
12
|
+
assert_code_style(package_root=PACKAGE_ROOT, verbose=bool(verbosity), sys_exit=True)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from cli_base.cli_tools.verbosity import setup_logging
|
|
5
|
+
from cli_base.tyro_commands import TyroVerbosityArgType
|
|
6
|
+
from rich import print # noqa
|
|
7
|
+
|
|
8
|
+
from PyHardLinkBackup.cli_dev import app
|
|
9
|
+
from PyHardLinkBackup.utilities.filesystem import iter_scandir_files, verbose_path_stat
|
|
10
|
+
from PyHardLinkBackup.utilities.humanize import PrintTimingContextManager
|
|
11
|
+
from PyHardLinkBackup.utilities.tyro_cli_shared_args import (
|
|
12
|
+
DEFAULT_EXCLUDE_DIRECTORIES,
|
|
13
|
+
TyroExcludeDirectoriesArgType,
|
|
14
|
+
TyroOneFileSystemArgType,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@app.command
|
|
22
|
+
def fs_info(
|
|
23
|
+
path: Path,
|
|
24
|
+
/,
|
|
25
|
+
one_file_system: TyroOneFileSystemArgType = True,
|
|
26
|
+
excludes: TyroExcludeDirectoriesArgType = DEFAULT_EXCLUDE_DIRECTORIES,
|
|
27
|
+
verbosity: TyroVerbosityArgType = 2,
|
|
28
|
+
) -> None:
|
|
29
|
+
"""
|
|
30
|
+
Display information about the filesystem under the given path.
|
|
31
|
+
"""
|
|
32
|
+
setup_logging(verbosity=verbosity)
|
|
33
|
+
exclude_set = set(excludes)
|
|
34
|
+
|
|
35
|
+
src_path_stat = verbose_path_stat(path)
|
|
36
|
+
src_device_id = src_path_stat.st_dev
|
|
37
|
+
|
|
38
|
+
with PrintTimingContextManager('Filesystem scan completed in'):
|
|
39
|
+
for entry in iter_scandir_files(
|
|
40
|
+
path=path,
|
|
41
|
+
one_file_system=one_file_system,
|
|
42
|
+
src_device_id=src_device_id,
|
|
43
|
+
excludes=exclude_set,
|
|
44
|
+
):
|
|
45
|
+
entry_path = Path(entry.path)
|
|
46
|
+
entry_stat = verbose_path_stat(entry_path)
|
|
47
|
+
print(f'Size: {entry_stat.st_size} bytes, Inode: {entry_stat.st_ino}, File: {entry_path}')
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from cli_base.cli_tools.dev_tools import run_unittest_cli
|
|
4
|
+
from cli_base.cli_tools.subprocess_utils import ToolsExecutor
|
|
5
|
+
from cli_base.cli_tools.verbosity import setup_logging
|
|
6
|
+
from cli_base.run_pip_audit import run_pip_audit
|
|
7
|
+
from cli_base.tyro_commands import TyroVerbosityArgType
|
|
8
|
+
from manageprojects.utilities.publish import publish_package
|
|
9
|
+
|
|
10
|
+
import PyHardLinkBackup
|
|
11
|
+
from PyHardLinkBackup.cli_dev import PACKAGE_ROOT, app
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@app.command
|
|
18
|
+
def install():
|
|
19
|
+
"""
|
|
20
|
+
Install requirements and 'PyHardLinkBackup' via pip as editable.
|
|
21
|
+
"""
|
|
22
|
+
tools_executor = ToolsExecutor(cwd=PACKAGE_ROOT)
|
|
23
|
+
tools_executor.verbose_check_call('uv', 'sync')
|
|
24
|
+
tools_executor.verbose_check_call('uv', 'pip', 'install', '--no-deps', '-e', '.')
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@app.command
|
|
28
|
+
def pip_audit(verbosity: TyroVerbosityArgType):
|
|
29
|
+
"""
|
|
30
|
+
Run pip-audit check against current requirements files
|
|
31
|
+
"""
|
|
32
|
+
setup_logging(verbosity=verbosity)
|
|
33
|
+
run_pip_audit(base_path=PACKAGE_ROOT, verbosity=verbosity)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@app.command
|
|
37
|
+
def update(verbosity: TyroVerbosityArgType):
|
|
38
|
+
"""
|
|
39
|
+
Update dependencies (uv.lock) and git pre-commit hooks
|
|
40
|
+
"""
|
|
41
|
+
setup_logging(verbosity=verbosity)
|
|
42
|
+
|
|
43
|
+
tools_executor = ToolsExecutor(cwd=PACKAGE_ROOT)
|
|
44
|
+
tools_executor.verbose_check_call('uv', 'lock', '--upgrade')
|
|
45
|
+
|
|
46
|
+
run_pip_audit(base_path=PACKAGE_ROOT, verbosity=verbosity)
|
|
47
|
+
|
|
48
|
+
# Install new dependencies in current .venv:
|
|
49
|
+
tools_executor.verbose_check_call('uv', 'sync')
|
|
50
|
+
|
|
51
|
+
# Update git pre-commit hooks:
|
|
52
|
+
tools_executor.verbose_check_call('pre-commit', 'autoupdate')
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@app.command
|
|
56
|
+
def publish():
|
|
57
|
+
"""
|
|
58
|
+
Build and upload this project to PyPi
|
|
59
|
+
"""
|
|
60
|
+
run_unittest_cli(verbose=False, exit_after_run=False) # Don't publish a broken state
|
|
61
|
+
|
|
62
|
+
publish_package(module=PyHardLinkBackup, package_path=PACKAGE_ROOT)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from cli_base.cli_tools.shell_completion import setup_tyro_shell_completion
|
|
4
|
+
from cli_base.cli_tools.verbosity import setup_logging
|
|
5
|
+
from cli_base.tyro_commands import TyroVerbosityArgType
|
|
6
|
+
from rich import print # noqa
|
|
7
|
+
|
|
8
|
+
from PyHardLinkBackup.cli_dev import app
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@app.command
|
|
15
|
+
def shell_completion(verbosity: TyroVerbosityArgType = 1, remove: bool = False) -> None:
|
|
16
|
+
"""
|
|
17
|
+
Setup shell completion for this CLI (Currently only for bash shell)
|
|
18
|
+
"""
|
|
19
|
+
setup_logging(verbosity=verbosity)
|
|
20
|
+
setup_tyro_shell_completion(
|
|
21
|
+
prog_name='PyHardLinkBackup_dev_cli',
|
|
22
|
+
remove=remove,
|
|
23
|
+
)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from cli_base.cli_tools.dev_tools import run_coverage, run_nox, run_unittest_cli
|
|
2
|
+
from cli_base.cli_tools.subprocess_utils import verbose_check_call
|
|
3
|
+
from cli_base.cli_tools.test_utils.snapshot import UpdateTestSnapshotFiles
|
|
4
|
+
from cli_base.tyro_commands import TyroVerbosityArgType
|
|
5
|
+
|
|
6
|
+
from PyHardLinkBackup.cli_dev import PACKAGE_ROOT, app
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@app.command
|
|
10
|
+
def mypy(verbosity: TyroVerbosityArgType):
|
|
11
|
+
"""Run Mypy (configured in pyproject.toml)"""
|
|
12
|
+
verbose_check_call('mypy', '.', cwd=PACKAGE_ROOT, verbose=verbosity > 0, exit_on_error=True)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@app.command
|
|
16
|
+
def update_test_snapshot_files(verbosity: TyroVerbosityArgType):
|
|
17
|
+
"""
|
|
18
|
+
Update all test snapshot files (by remove and recreate all snapshot files)
|
|
19
|
+
"""
|
|
20
|
+
with UpdateTestSnapshotFiles(root_path=PACKAGE_ROOT, verbose=verbosity > 0):
|
|
21
|
+
# Just recreate them by running tests:
|
|
22
|
+
run_unittest_cli(
|
|
23
|
+
extra_env=dict(
|
|
24
|
+
RAISE_SNAPSHOT_ERRORS='0', # Recreate snapshot files without error
|
|
25
|
+
),
|
|
26
|
+
verbose=verbosity > 1,
|
|
27
|
+
exit_after_run=False,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@app.command # Dummy command
|
|
32
|
+
def test():
|
|
33
|
+
"""
|
|
34
|
+
Run unittests
|
|
35
|
+
"""
|
|
36
|
+
run_unittest_cli()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@app.command # Dummy command
|
|
40
|
+
def coverage():
|
|
41
|
+
"""
|
|
42
|
+
Run tests and show coverage report.
|
|
43
|
+
"""
|
|
44
|
+
run_coverage()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@app.command # Dummy "nox" command
|
|
48
|
+
def nox():
|
|
49
|
+
"""
|
|
50
|
+
Run nox
|
|
51
|
+
"""
|
|
52
|
+
run_nox()
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from cli_base.cli_tools import git_history
|
|
6
|
+
from cli_base.cli_tools.verbosity import setup_logging
|
|
7
|
+
from cli_base.tyro_commands import TyroVerbosityArgType
|
|
8
|
+
from rich import print # noqa
|
|
9
|
+
|
|
10
|
+
from PyHardLinkBackup.cli_dev import app
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@app.command
|
|
17
|
+
def update_readme_history(verbosity: TyroVerbosityArgType):
|
|
18
|
+
"""
|
|
19
|
+
Update project history base on git commits/tags in README.md
|
|
20
|
+
|
|
21
|
+
Will be exited with 1 if the README.md was updated otherwise with 0.
|
|
22
|
+
|
|
23
|
+
Also, callable via e.g.:
|
|
24
|
+
python -m cli_base update-readme-history -v
|
|
25
|
+
"""
|
|
26
|
+
setup_logging(verbosity=verbosity)
|
|
27
|
+
|
|
28
|
+
logger.debug('%s called. CWD: %s', __name__, Path.cwd())
|
|
29
|
+
updated = git_history.update_readme_history(verbosity=verbosity)
|
|
30
|
+
exit_code = 1 if updated else 0
|
|
31
|
+
if verbosity:
|
|
32
|
+
print(f'{exit_code=}')
|
|
33
|
+
sys.exit(exit_code)
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import datetime
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from rich import print # noqa
|
|
10
|
+
|
|
11
|
+
from PyHardLinkBackup.logging_setup import LoggingManager
|
|
12
|
+
from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
|
|
13
|
+
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
14
|
+
from PyHardLinkBackup.utilities.filesystem import (
|
|
15
|
+
hash_file,
|
|
16
|
+
humanized_fs_scan,
|
|
17
|
+
iter_scandir_files,
|
|
18
|
+
verbose_path_stat,
|
|
19
|
+
)
|
|
20
|
+
from PyHardLinkBackup.utilities.humanize import PrintTimingContextManager, human_filesize
|
|
21
|
+
from PyHardLinkBackup.utilities.rich_utils import DisplayFileTreeProgress
|
|
22
|
+
from PyHardLinkBackup.utilities.tee import TeeStdoutContext
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclasses.dataclass
|
|
29
|
+
class CompareResult:
|
|
30
|
+
last_timestamp: str
|
|
31
|
+
compare_dir: Path
|
|
32
|
+
log_file: Path
|
|
33
|
+
#
|
|
34
|
+
total_file_count: int = 0
|
|
35
|
+
total_size: int = 0
|
|
36
|
+
#
|
|
37
|
+
src_file_new_count: int = 0
|
|
38
|
+
file_size_missmatch: int = 0
|
|
39
|
+
file_hash_missmatch: int = 0
|
|
40
|
+
#
|
|
41
|
+
small_file_count: int = 0
|
|
42
|
+
size_db_missing_count: int = 0
|
|
43
|
+
hash_db_missing_count: int = 0
|
|
44
|
+
#
|
|
45
|
+
successful_file_count: int = 0
|
|
46
|
+
error_count: int = 0
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def compare_one_file(
|
|
50
|
+
*,
|
|
51
|
+
src_root: Path,
|
|
52
|
+
entry: os.DirEntry,
|
|
53
|
+
size_db: FileSizeDatabase,
|
|
54
|
+
hash_db: FileHashDatabase,
|
|
55
|
+
compare_dir: Path,
|
|
56
|
+
compare_result: CompareResult,
|
|
57
|
+
progress: DisplayFileTreeProgress,
|
|
58
|
+
) -> None:
|
|
59
|
+
if entry.is_file():
|
|
60
|
+
# For the progress bars:
|
|
61
|
+
compare_result.total_file_count += 1
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
src_size = entry.stat().st_size
|
|
65
|
+
except FileNotFoundError as err:
|
|
66
|
+
logger.warning(f'Broken symlink {entry.path}: {err.__class__.__name__}: {err}')
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
# For the progress bars:
|
|
70
|
+
compare_result.total_size += src_size
|
|
71
|
+
|
|
72
|
+
src_path = Path(entry.path)
|
|
73
|
+
|
|
74
|
+
dst_path = compare_dir / src_path.relative_to(src_root)
|
|
75
|
+
|
|
76
|
+
if not dst_path.exists():
|
|
77
|
+
logger.warning('Source file %s not found in compare %s', src_path, dst_path)
|
|
78
|
+
compare_result.src_file_new_count += 1
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
if src_path.is_dir():
|
|
82
|
+
if not src_path.is_symlink():
|
|
83
|
+
raise RuntimeError(f'Internal error - Directory found: {src_path=}')
|
|
84
|
+
|
|
85
|
+
# compare directory symlink targets:
|
|
86
|
+
src_target = src_path.readlink()
|
|
87
|
+
dst_target = dst_path.readlink()
|
|
88
|
+
if src_target != dst_target:
|
|
89
|
+
logger.warning(
|
|
90
|
+
'Source directory symlink %s target %s differs from compare symlink %s target %s',
|
|
91
|
+
src_path,
|
|
92
|
+
src_target,
|
|
93
|
+
dst_path,
|
|
94
|
+
dst_target,
|
|
95
|
+
)
|
|
96
|
+
compare_result.error_count += 1
|
|
97
|
+
return
|
|
98
|
+
|
|
99
|
+
dst_size = dst_path.stat().st_size
|
|
100
|
+
if src_size != dst_size:
|
|
101
|
+
logger.warning(
|
|
102
|
+
'Source file %s size (%i Bytes) differs from compare file %s size (%iBytes)',
|
|
103
|
+
src_path,
|
|
104
|
+
src_size,
|
|
105
|
+
dst_path,
|
|
106
|
+
dst_size,
|
|
107
|
+
)
|
|
108
|
+
compare_result.file_size_missmatch += 1
|
|
109
|
+
return
|
|
110
|
+
|
|
111
|
+
src_hash = hash_file(src_path, progress=progress, total_size=src_size)
|
|
112
|
+
dst_hash = hash_file(dst_path, progress=progress, total_size=dst_size)
|
|
113
|
+
|
|
114
|
+
if src_hash != dst_hash:
|
|
115
|
+
logger.warning(
|
|
116
|
+
'Source file %s hash %r differs from compare file %s hash (%s)',
|
|
117
|
+
src_path,
|
|
118
|
+
src_hash,
|
|
119
|
+
dst_path,
|
|
120
|
+
dst_hash,
|
|
121
|
+
)
|
|
122
|
+
compare_result.file_hash_missmatch += 1
|
|
123
|
+
return
|
|
124
|
+
|
|
125
|
+
if src_size < size_db.MIN_SIZE:
|
|
126
|
+
# Small file -> Not in deduplication database
|
|
127
|
+
compare_result.small_file_count += 1
|
|
128
|
+
else:
|
|
129
|
+
if src_size not in size_db:
|
|
130
|
+
logger.warning(
|
|
131
|
+
'Source file %s size (%i Bytes) not found in deduplication database',
|
|
132
|
+
src_path,
|
|
133
|
+
src_size,
|
|
134
|
+
)
|
|
135
|
+
compare_result.size_db_missing_count += 1
|
|
136
|
+
|
|
137
|
+
if src_hash not in hash_db:
|
|
138
|
+
logger.warning(
|
|
139
|
+
'Source file %s hash %r not found in deduplication database',
|
|
140
|
+
src_path,
|
|
141
|
+
src_hash,
|
|
142
|
+
)
|
|
143
|
+
compare_result.hash_db_missing_count += 1
|
|
144
|
+
|
|
145
|
+
# Everything is ok
|
|
146
|
+
compare_result.successful_file_count += 1
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def compare_tree(
|
|
150
|
+
*,
|
|
151
|
+
src_root: Path,
|
|
152
|
+
backup_root: Path,
|
|
153
|
+
one_file_system: bool,
|
|
154
|
+
excludes: tuple[str, ...],
|
|
155
|
+
log_manager: LoggingManager,
|
|
156
|
+
) -> CompareResult:
|
|
157
|
+
src_root = src_root.resolve()
|
|
158
|
+
if not src_root.is_dir():
|
|
159
|
+
print('Error: Source directory does not exist!')
|
|
160
|
+
print(f'Please check source directory: "{src_root}"\n')
|
|
161
|
+
sys.exit(1)
|
|
162
|
+
|
|
163
|
+
backup_root = backup_root.resolve()
|
|
164
|
+
phlb_conf_dir = backup_root / '.phlb'
|
|
165
|
+
if not phlb_conf_dir.is_dir():
|
|
166
|
+
print('Error: Compare directory seems to be wrong! (No .phlb configuration directory found)')
|
|
167
|
+
print(f'Please check backup directory: "{backup_root}"\n')
|
|
168
|
+
sys.exit(1)
|
|
169
|
+
|
|
170
|
+
compare_main_dir = backup_root / src_root.name
|
|
171
|
+
timestamps = sorted(
|
|
172
|
+
path.name for path in compare_main_dir.iterdir() if path.is_dir() and path.name.startswith('20')
|
|
173
|
+
)
|
|
174
|
+
print(f'Found {len(timestamps)} compare(s) in {compare_main_dir}:')
|
|
175
|
+
for timestamp in timestamps:
|
|
176
|
+
print(f' * {timestamp}')
|
|
177
|
+
last_timestamp = timestamps[-1]
|
|
178
|
+
compare_dir = compare_main_dir / last_timestamp
|
|
179
|
+
print(f'\nComparing source tree {src_root} with {last_timestamp} compare:')
|
|
180
|
+
print(f' {compare_dir}\n')
|
|
181
|
+
|
|
182
|
+
now_timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S')
|
|
183
|
+
log_file = compare_main_dir / f'{now_timestamp}-compare.log'
|
|
184
|
+
log_manager.start_file_logging(log_file)
|
|
185
|
+
|
|
186
|
+
src_device_id = verbose_path_stat(src_root).st_dev
|
|
187
|
+
|
|
188
|
+
excludes: set = set(excludes)
|
|
189
|
+
with PrintTimingContextManager('Filesystem scan completed in'):
|
|
190
|
+
src_file_count, src_total_size = humanized_fs_scan(
|
|
191
|
+
path=src_root,
|
|
192
|
+
one_file_system=one_file_system,
|
|
193
|
+
src_device_id=src_device_id,
|
|
194
|
+
excludes=excludes,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
with DisplayFileTreeProgress(
|
|
198
|
+
description=f'Compare {src_root}...',
|
|
199
|
+
total_file_count=src_file_count,
|
|
200
|
+
total_size=src_total_size,
|
|
201
|
+
) as progress:
|
|
202
|
+
# init "databases":
|
|
203
|
+
size_db = FileSizeDatabase(phlb_conf_dir)
|
|
204
|
+
hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
|
|
205
|
+
|
|
206
|
+
compare_result = CompareResult(last_timestamp=last_timestamp, compare_dir=compare_dir, log_file=log_file)
|
|
207
|
+
|
|
208
|
+
next_update = 0
|
|
209
|
+
for entry in iter_scandir_files(
|
|
210
|
+
path=src_root,
|
|
211
|
+
one_file_system=one_file_system,
|
|
212
|
+
src_device_id=src_device_id,
|
|
213
|
+
excludes=excludes,
|
|
214
|
+
):
|
|
215
|
+
try:
|
|
216
|
+
compare_one_file(
|
|
217
|
+
src_root=src_root,
|
|
218
|
+
entry=entry,
|
|
219
|
+
size_db=size_db,
|
|
220
|
+
hash_db=hash_db,
|
|
221
|
+
compare_dir=compare_dir,
|
|
222
|
+
compare_result=compare_result,
|
|
223
|
+
progress=progress,
|
|
224
|
+
)
|
|
225
|
+
except Exception as err:
|
|
226
|
+
logger.exception(f'Compare {entry.path} {err.__class__.__name__}: {err}')
|
|
227
|
+
compare_result.error_count += 1
|
|
228
|
+
else:
|
|
229
|
+
now = time.monotonic()
|
|
230
|
+
if now >= next_update:
|
|
231
|
+
progress.update(
|
|
232
|
+
completed_file_count=compare_result.total_file_count,
|
|
233
|
+
advance_size=compare_result.total_size,
|
|
234
|
+
)
|
|
235
|
+
next_update = now + 0.5
|
|
236
|
+
|
|
237
|
+
# Finalize progress indicator values:
|
|
238
|
+
progress.update(completed_file_count=compare_result.total_file_count, advance_size=compare_result.total_size)
|
|
239
|
+
|
|
240
|
+
summary_file = compare_main_dir / f'{now_timestamp}-summary.txt'
|
|
241
|
+
with TeeStdoutContext(summary_file):
|
|
242
|
+
print(f'\nCompare complete: {compare_dir} (total size {human_filesize(compare_result.total_size)})\n')
|
|
243
|
+
print(f' Total files processed: {compare_result.total_file_count}')
|
|
244
|
+
print(f' * Successful compared files: {compare_result.successful_file_count}')
|
|
245
|
+
print(f' * New source files: {compare_result.src_file_new_count}')
|
|
246
|
+
print(f' * File size missmatch: {compare_result.file_size_missmatch}')
|
|
247
|
+
print(f' * File hash missmatch: {compare_result.file_hash_missmatch}')
|
|
248
|
+
|
|
249
|
+
print(f' * Small (<{size_db.MIN_SIZE} Bytes) files: {compare_result.small_file_count}')
|
|
250
|
+
print(f' * Missing in size DB: {compare_result.size_db_missing_count}')
|
|
251
|
+
print(f' * Missing in hash DB: {compare_result.hash_db_missing_count}')
|
|
252
|
+
|
|
253
|
+
if compare_result.error_count > 0:
|
|
254
|
+
print(f' Errors during compare: {compare_result.error_count} (see log for details)')
|
|
255
|
+
print()
|
|
256
|
+
|
|
257
|
+
logger.info('Compare completed. Summary created: %s', summary_file)
|
|
258
|
+
|
|
259
|
+
return compare_result
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import PyHardLinkBackup
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
CLI_EPILOG = 'Project Homepage: https://github.com/jedie/PyHardLinkBackup'
|
|
7
|
+
|
|
8
|
+
BASE_PATH = Path(PyHardLinkBackup.__file__).parent
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
##########################################################################
|
|
12
|
+
# "Settings" for PyHardLinkBackup:
|
|
13
|
+
|
|
14
|
+
HASH_ALGO = 'sha256'
|
|
15
|
+
SMALL_FILE_THRESHOLD = 1000 # bytes
|
|
16
|
+
CHUNK_SIZE = 64 * 1024 * 1024 # 64 MB
|
|
17
|
+
LAGE_FILE_PROGRESS_MIN_SIZE = CHUNK_SIZE * 3
|
|
18
|
+
|