PyHardLinkBackup 1.4.1__tar.gz → 1.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PKG-INFO +8 -5
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/__init__.py +1 -1
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_app/phlb.py +37 -1
- pyhardlinkbackup-1.5.0/PyHardLinkBackup/compare_backup.py +212 -0
- pyhardlinkbackup-1.5.0/PyHardLinkBackup/tests/test_compare_backup.py +86 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/README.md +7 -4
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/.editorconfig +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/.github/workflows/tests.yml +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/.gitignore +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/.idea/.gitignore +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/.pre-commit-config.yaml +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/.pre-commit-hooks.yaml +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/.run/Template Python tests.run.xml +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/.run/Unittests - __all__.run.xml +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/.run/cli.py --help.run.xml +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/.run/dev-cli update.run.xml +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/.run/only DocTests.run.xml +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/.run/only DocWrite.run.xml +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/.venv-app/lib/python3.12/site-packages/cli_base/tests/shell_complete_snapshots/.gitignore +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/__main__.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/backup.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_app/__init__.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_dev/__init__.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_dev/benchmark.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_dev/code_style.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_dev/packaging.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_dev/shell_completion.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_dev/testing.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_dev/update_readme_history.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/constants.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/logging_setup.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/rebuild_databases.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/__init__.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_backup.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_doc_write.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_doctests.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_project_setup.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_readme.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_readme_history.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_rebuild_database.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/__init__.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/file_hash_database.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/file_size_database.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/filesystem.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/humanize.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/rich_utils.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/sha256sums.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/tee.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/tests/__init__.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/tests/test_file_size_database.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/tests/test_filesystem.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/tyro_cli_shared_args.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/cli.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/dev-cli.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/dist/.gitignore +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/docs/README.md +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/docs/about-docs.md +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/noxfile.py +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/pyproject.toml +0 -0
- {pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: PyHardLinkBackup
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.0
|
|
4
4
|
Summary: HardLink/Deduplication Backups with Python
|
|
5
5
|
Project-URL: Documentation, https://github.com/jedie/PyHardLinkBackup
|
|
6
6
|
Project-URL: Source, https://github.com/jedie/PyHardLinkBackup
|
|
@@ -80,7 +80,7 @@ complete help for main CLI app:
|
|
|
80
80
|
|
|
81
81
|
[comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
|
|
82
82
|
```
|
|
83
|
-
usage: phlb [-h] {backup,rebuild,version}
|
|
83
|
+
usage: phlb [-h] {backup,compare,rebuild,version}
|
|
84
84
|
|
|
85
85
|
|
|
86
86
|
|
|
@@ -90,6 +90,7 @@ usage: phlb [-h] {backup,rebuild,version}
|
|
|
90
90
|
╭─ subcommands ────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
91
91
|
│ (required) │
|
|
92
92
|
│ • backup Backup the source directory to the destination directory using hard links for deduplication. │
|
|
93
|
+
│ • compare Compares a source tree with the last backup and validates all known file hashes. │
|
|
93
94
|
│ • rebuild Rebuild the file hash and size database by scanning all backup files. And also verify SHA256SUMS and/or │
|
|
94
95
|
│ store missing hashes in SHA256SUMS files. │
|
|
95
96
|
│ • version Print version and exit │
|
|
@@ -231,6 +232,8 @@ Overview of main changes:
|
|
|
231
232
|
|
|
232
233
|
[comment]: <> (✂✂✂ auto generated history start ✂✂✂)
|
|
233
234
|
|
|
235
|
+
* [v1.5.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.4.1...v1.5.0)
|
|
236
|
+
* 2026-01-17 - NEW: Compare command to verify source tree with last backup
|
|
234
237
|
* [v1.4.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.4.0...v1.4.1)
|
|
235
238
|
* 2026-01-16 - Bugfix large file handling
|
|
236
239
|
* [v1.4.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.3.0...v1.4.0)
|
|
@@ -243,15 +246,15 @@ Overview of main changes:
|
|
|
243
246
|
* 2026-01-15 - Add tests for rebuild
|
|
244
247
|
* 2026-01-15 - Add command to "rebuld" the size and hash filesystem database
|
|
245
248
|
* 2026-01-15 - Add screenshots in the README
|
|
249
|
+
|
|
250
|
+
<details><summary>Expand older history entries ...</summary>
|
|
251
|
+
|
|
246
252
|
* [v1.2.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.1.0...v1.2.0)
|
|
247
253
|
* 2026-01-15 - Add error handling: Log exception but continue with the backup
|
|
248
254
|
* 2026-01-15 - Check permission and hadlink support on destination path
|
|
249
255
|
* 2026-01-14 - Enhance progress bars
|
|
250
256
|
* 2026-01-14 - A a note to rsync --link-dest
|
|
251
257
|
* 2026-01-14 - Use cli_base.cli_tools.test_utils.base_testcases
|
|
252
|
-
|
|
253
|
-
<details><summary>Expand older history entries ...</summary>
|
|
254
|
-
|
|
255
258
|
* [v1.1.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.1...v1.1.0)
|
|
256
259
|
* 2026-01-14 - Change backup timestamp directory to old schema: '%Y-%m-%d-%H%M%S'
|
|
257
260
|
* 2026-01-14 - Add "Overview of main changes" to README
|
|
@@ -5,7 +5,7 @@ from typing import Annotated
|
|
|
5
5
|
import tyro
|
|
6
6
|
from rich import print # noqa
|
|
7
7
|
|
|
8
|
-
from PyHardLinkBackup import rebuild_databases
|
|
8
|
+
from PyHardLinkBackup import compare_backup, rebuild_databases
|
|
9
9
|
from PyHardLinkBackup.backup import backup_tree
|
|
10
10
|
from PyHardLinkBackup.cli_app import app
|
|
11
11
|
from PyHardLinkBackup.logging_setup import (
|
|
@@ -60,6 +60,42 @@ def backup(
|
|
|
60
60
|
)
|
|
61
61
|
|
|
62
62
|
|
|
63
|
+
@app.command
|
|
64
|
+
def compare(
|
|
65
|
+
src: Annotated[
|
|
66
|
+
Path,
|
|
67
|
+
tyro.conf.arg(
|
|
68
|
+
metavar='source',
|
|
69
|
+
help='Source directory that should be compared with the last backup.',
|
|
70
|
+
),
|
|
71
|
+
],
|
|
72
|
+
dst: Annotated[
|
|
73
|
+
Path,
|
|
74
|
+
tyro.conf.arg(
|
|
75
|
+
metavar='destination',
|
|
76
|
+
help='Destination directory with the backups. Will pick the last backup for comparison.',
|
|
77
|
+
),
|
|
78
|
+
],
|
|
79
|
+
/,
|
|
80
|
+
excludes: TyroExcludeDirectoriesArgType = DEFAULT_EXCLUDE_DIRECTORIES,
|
|
81
|
+
verbosity: TyroConsoleLogLevelArgType = DEFAULT_CONSOLE_LOG_LEVEL,
|
|
82
|
+
log_file_level: TyroLogFileLevelArgType = DEFAULT_LOG_FILE_LEVEL,
|
|
83
|
+
) -> None:
|
|
84
|
+
"""
|
|
85
|
+
Compares a source tree with the last backup and validates all known file hashes.
|
|
86
|
+
"""
|
|
87
|
+
log_manager = LoggingManager(
|
|
88
|
+
console_level=verbosity,
|
|
89
|
+
file_level=log_file_level,
|
|
90
|
+
)
|
|
91
|
+
compare_backup.compare_tree(
|
|
92
|
+
src_root=src,
|
|
93
|
+
backup_root=dst,
|
|
94
|
+
excludes=excludes,
|
|
95
|
+
log_manager=log_manager,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
63
99
|
@app.command
|
|
64
100
|
def rebuild(
|
|
65
101
|
backup_root: Annotated[
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import datetime
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from rich import print # noqa
|
|
10
|
+
|
|
11
|
+
from PyHardLinkBackup.logging_setup import LoggingManager
|
|
12
|
+
from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
|
|
13
|
+
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
14
|
+
from PyHardLinkBackup.utilities.filesystem import (
|
|
15
|
+
hash_file,
|
|
16
|
+
humanized_fs_scan,
|
|
17
|
+
iter_scandir_files,
|
|
18
|
+
)
|
|
19
|
+
from PyHardLinkBackup.utilities.humanize import PrintTimingContextManager, human_filesize
|
|
20
|
+
from PyHardLinkBackup.utilities.rich_utils import DisplayFileTreeProgress
|
|
21
|
+
from PyHardLinkBackup.utilities.tee import TeeStdoutContext
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclasses.dataclass
|
|
28
|
+
class CompareResult:
|
|
29
|
+
compare_dir: Path
|
|
30
|
+
log_file: Path
|
|
31
|
+
#
|
|
32
|
+
total_file_count: int = 0
|
|
33
|
+
total_size: int = 0
|
|
34
|
+
#
|
|
35
|
+
src_file_new_count: int = 0
|
|
36
|
+
file_size_missmatch: int = 0
|
|
37
|
+
file_hash_missmatch: int = 0
|
|
38
|
+
#
|
|
39
|
+
small_file_count: int = 0
|
|
40
|
+
size_db_missing_count: int = 0
|
|
41
|
+
hash_db_missing_count: int = 0
|
|
42
|
+
#
|
|
43
|
+
successful_file_count: int = 0
|
|
44
|
+
error_count: int = 0
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def compare_one_file(
|
|
48
|
+
*,
|
|
49
|
+
src_root: Path,
|
|
50
|
+
entry: os.DirEntry,
|
|
51
|
+
size_db: FileSizeDatabase,
|
|
52
|
+
hash_db: FileHashDatabase,
|
|
53
|
+
compare_dir: Path,
|
|
54
|
+
compare_result: CompareResult,
|
|
55
|
+
) -> None:
|
|
56
|
+
src_size = entry.stat().st_size
|
|
57
|
+
|
|
58
|
+
# For the progress bars:
|
|
59
|
+
compare_result.total_file_count += 1
|
|
60
|
+
compare_result.total_size += src_size
|
|
61
|
+
|
|
62
|
+
src_path = Path(entry.path)
|
|
63
|
+
dst_path = compare_dir / src_path.relative_to(src_root)
|
|
64
|
+
|
|
65
|
+
if not dst_path.exists():
|
|
66
|
+
logger.warning('Source file %s not found in compare %s', src_path, dst_path)
|
|
67
|
+
compare_result.src_file_new_count += 1
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
dst_size = dst_path.stat().st_size
|
|
71
|
+
if src_size != dst_size:
|
|
72
|
+
logger.warning(
|
|
73
|
+
'Source file %s size (%i Bytes) differs from compare file %s size (%iBytes)',
|
|
74
|
+
src_path,
|
|
75
|
+
src_size,
|
|
76
|
+
dst_path,
|
|
77
|
+
dst_size,
|
|
78
|
+
)
|
|
79
|
+
compare_result.file_size_missmatch += 1
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
src_hash = hash_file(src_path)
|
|
83
|
+
dst_hash = hash_file(dst_path)
|
|
84
|
+
|
|
85
|
+
if src_hash != dst_hash:
|
|
86
|
+
logger.warning(
|
|
87
|
+
'Source file %s hash %r differs from compare file %s hash (%s)',
|
|
88
|
+
src_path,
|
|
89
|
+
src_hash,
|
|
90
|
+
dst_path,
|
|
91
|
+
dst_hash,
|
|
92
|
+
)
|
|
93
|
+
compare_result.file_hash_missmatch += 1
|
|
94
|
+
return
|
|
95
|
+
|
|
96
|
+
if src_size < size_db.MIN_SIZE:
|
|
97
|
+
# Small file -> Not in deduplication database
|
|
98
|
+
compare_result.small_file_count += 1
|
|
99
|
+
else:
|
|
100
|
+
if src_size not in size_db:
|
|
101
|
+
logger.warning(
|
|
102
|
+
'Source file %s size (%i Bytes) not found in deduplication database',
|
|
103
|
+
src_path,
|
|
104
|
+
src_size,
|
|
105
|
+
)
|
|
106
|
+
compare_result.size_db_missing_count += 1
|
|
107
|
+
|
|
108
|
+
if src_hash not in hash_db:
|
|
109
|
+
logger.warning(
|
|
110
|
+
'Source file %s hash %r not found in deduplication database',
|
|
111
|
+
src_path,
|
|
112
|
+
src_hash,
|
|
113
|
+
)
|
|
114
|
+
compare_result.hash_db_missing_count += 1
|
|
115
|
+
|
|
116
|
+
# Everything is ok
|
|
117
|
+
compare_result.successful_file_count += 1
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def compare_tree(
|
|
121
|
+
*,
|
|
122
|
+
src_root: Path,
|
|
123
|
+
backup_root: Path,
|
|
124
|
+
excludes: tuple[str, ...],
|
|
125
|
+
log_manager: LoggingManager,
|
|
126
|
+
) -> CompareResult:
|
|
127
|
+
src_root = src_root.resolve()
|
|
128
|
+
if not src_root.is_dir():
|
|
129
|
+
print('Error: Source directory does not exist!')
|
|
130
|
+
print(f'Please check source directory: "{src_root}"\n')
|
|
131
|
+
sys.exit(1)
|
|
132
|
+
|
|
133
|
+
backup_root = backup_root.resolve()
|
|
134
|
+
phlb_conf_dir = backup_root / '.phlb'
|
|
135
|
+
if not phlb_conf_dir.is_dir():
|
|
136
|
+
print('Error: Compare directory seems to be wrong! (No .phlb configuration directory found)')
|
|
137
|
+
print(f'Please check backup directory: "{backup_root}"\n')
|
|
138
|
+
sys.exit(1)
|
|
139
|
+
|
|
140
|
+
compare_main_dir = backup_root / src_root.name
|
|
141
|
+
timestamps = sorted(
|
|
142
|
+
path.name for path in compare_main_dir.iterdir() if path.is_dir() and path.name.startswith('20')
|
|
143
|
+
)
|
|
144
|
+
print(f'Found {len(timestamps)} compare(s) in {compare_main_dir}:')
|
|
145
|
+
for timestamp in timestamps:
|
|
146
|
+
print(f' * {timestamp}')
|
|
147
|
+
last_timestamp = timestamps[-1]
|
|
148
|
+
compare_dir = compare_main_dir / last_timestamp
|
|
149
|
+
print(f'\nComparing source tree {src_root} with {last_timestamp} compare:')
|
|
150
|
+
print(f' {compare_dir}\n')
|
|
151
|
+
|
|
152
|
+
now_timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S')
|
|
153
|
+
log_file = compare_main_dir / f'{now_timestamp}-compare.log'
|
|
154
|
+
log_manager.start_file_logging(log_file)
|
|
155
|
+
|
|
156
|
+
excludes: set = set(excludes)
|
|
157
|
+
with PrintTimingContextManager('Filesystem scan completed in'):
|
|
158
|
+
src_file_count, src_total_size = humanized_fs_scan(src_root, excludes=excludes)
|
|
159
|
+
|
|
160
|
+
with DisplayFileTreeProgress(src_file_count, src_total_size) as progress:
|
|
161
|
+
# init "databases":
|
|
162
|
+
size_db = FileSizeDatabase(phlb_conf_dir)
|
|
163
|
+
hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
|
|
164
|
+
|
|
165
|
+
compare_result = CompareResult(compare_dir=compare_dir, log_file=log_file)
|
|
166
|
+
|
|
167
|
+
next_update = 0
|
|
168
|
+
for entry in iter_scandir_files(src_root, excludes=excludes):
|
|
169
|
+
try:
|
|
170
|
+
compare_one_file(
|
|
171
|
+
src_root=src_root,
|
|
172
|
+
entry=entry,
|
|
173
|
+
size_db=size_db,
|
|
174
|
+
hash_db=hash_db,
|
|
175
|
+
compare_dir=compare_dir,
|
|
176
|
+
compare_result=compare_result,
|
|
177
|
+
)
|
|
178
|
+
except Exception as err:
|
|
179
|
+
logger.exception(f'Compare {entry.path} {err.__class__.__name__}: {err}')
|
|
180
|
+
compare_result.error_count += 1
|
|
181
|
+
else:
|
|
182
|
+
now = time.monotonic()
|
|
183
|
+
if now >= next_update:
|
|
184
|
+
progress.update(
|
|
185
|
+
completed_file_count=compare_result.total_file_count,
|
|
186
|
+
completed_size=compare_result.total_size,
|
|
187
|
+
)
|
|
188
|
+
next_update = now + 0.5
|
|
189
|
+
|
|
190
|
+
# Finalize progress indicator values:
|
|
191
|
+
progress.update(completed_file_count=compare_result.total_file_count, completed_size=compare_result.total_size)
|
|
192
|
+
|
|
193
|
+
summary_file = compare_main_dir / f'{now_timestamp}-summary.txt'
|
|
194
|
+
with TeeStdoutContext(summary_file):
|
|
195
|
+
print(f'\nCompare complete: {compare_dir} (total size {human_filesize(compare_result.total_size)})\n')
|
|
196
|
+
print(f' Total files processed: {compare_result.total_file_count}')
|
|
197
|
+
print(f' * Successful compared files: {compare_result.successful_file_count}')
|
|
198
|
+
print(f' * New source files: {compare_result.src_file_new_count}')
|
|
199
|
+
print(f' * File size missmatch: {compare_result.file_size_missmatch}')
|
|
200
|
+
print(f' * File hash missmatch: {compare_result.file_hash_missmatch}')
|
|
201
|
+
|
|
202
|
+
print(f' * Small (<{size_db.MIN_SIZE} Bytes) files: {compare_result.small_file_count}')
|
|
203
|
+
print(f' * Missing in size DB: {compare_result.size_db_missing_count}')
|
|
204
|
+
print(f' * Missing in hash DB: {compare_result.hash_db_missing_count}')
|
|
205
|
+
|
|
206
|
+
if compare_result.error_count > 0:
|
|
207
|
+
print(f' Errors during compare: {compare_result.error_count} (see log for details)')
|
|
208
|
+
print()
|
|
209
|
+
|
|
210
|
+
logger.info('Compare completed. Summary created: %s', summary_file)
|
|
211
|
+
|
|
212
|
+
return compare_result
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
import tempfile
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from unittest import TestCase
|
|
5
|
+
|
|
6
|
+
from bx_py_utils.test_utils.redirect import RedirectOut
|
|
7
|
+
from cli_base.cli_tools.test_utils.base_testcases import OutputMustCapturedTestCaseMixin
|
|
8
|
+
|
|
9
|
+
from PyHardLinkBackup.compare_backup import CompareResult, LoggingManager, compare_tree
|
|
10
|
+
from PyHardLinkBackup.logging_setup import DEFAULT_LOG_FILE_LEVEL
|
|
11
|
+
from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
|
|
12
|
+
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
13
|
+
from PyHardLinkBackup.utilities.filesystem import hash_file
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CompareBackupTestCase(OutputMustCapturedTestCaseMixin, TestCase):
|
|
17
|
+
def test_happy_path(self):
|
|
18
|
+
with tempfile.TemporaryDirectory() as src_dir, tempfile.TemporaryDirectory() as backup_dir:
|
|
19
|
+
src_root = Path(src_dir).resolve()
|
|
20
|
+
backup_root = Path(backup_dir).resolve()
|
|
21
|
+
|
|
22
|
+
# Setup backup structure
|
|
23
|
+
phlb_conf_dir = backup_root / '.phlb'
|
|
24
|
+
phlb_conf_dir.mkdir()
|
|
25
|
+
|
|
26
|
+
compare_main_dir = backup_root / src_root.name
|
|
27
|
+
compare_main_dir.mkdir()
|
|
28
|
+
|
|
29
|
+
timestamp = '2026-01-17-120000'
|
|
30
|
+
compare_dir = compare_main_dir / timestamp
|
|
31
|
+
compare_dir.mkdir()
|
|
32
|
+
|
|
33
|
+
# Create source files
|
|
34
|
+
(src_root / 'small_file.txt').write_text('hello world')
|
|
35
|
+
(src_root / 'large_file_missing.txt').write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
|
|
36
|
+
large_file_in_dbs = src_root / 'large_file_in_dbs.txt'
|
|
37
|
+
large_file_in_dbs.write_bytes(b'Y' * (FileSizeDatabase.MIN_SIZE + 1))
|
|
38
|
+
|
|
39
|
+
# Copy files to backup
|
|
40
|
+
total_size = 0
|
|
41
|
+
total_file_count = 0
|
|
42
|
+
for file_path in src_root.iterdir():
|
|
43
|
+
shutil.copy2(file_path, compare_dir / file_path.name)
|
|
44
|
+
total_size += file_path.stat().st_size
|
|
45
|
+
total_file_count += 1
|
|
46
|
+
self.assertEqual(total_file_count, 3)
|
|
47
|
+
self.assertEqual(total_size, 2012)
|
|
48
|
+
|
|
49
|
+
# Create databases and add values from 'large_file_in_dbs.txt'
|
|
50
|
+
size_db = FileSizeDatabase(phlb_conf_dir)
|
|
51
|
+
size_db.add(FileSizeDatabase.MIN_SIZE + 1)
|
|
52
|
+
hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
|
|
53
|
+
src_hash = hash_file(large_file_in_dbs)
|
|
54
|
+
hash_db[src_hash] = compare_dir / 'large_file_in_dbs.txt'
|
|
55
|
+
|
|
56
|
+
# Run compare_tree
|
|
57
|
+
with RedirectOut() as redirected_out:
|
|
58
|
+
result = compare_tree(
|
|
59
|
+
src_root=src_root,
|
|
60
|
+
backup_root=backup_root,
|
|
61
|
+
excludes=(),
|
|
62
|
+
log_manager=LoggingManager(
|
|
63
|
+
console_level='info',
|
|
64
|
+
file_level=DEFAULT_LOG_FILE_LEVEL,
|
|
65
|
+
),
|
|
66
|
+
)
|
|
67
|
+
self.assertEqual(redirected_out.stderr, '')
|
|
68
|
+
self.assertIn('Compare completed.', redirected_out.stdout)
|
|
69
|
+
self.assertEqual(
|
|
70
|
+
result,
|
|
71
|
+
CompareResult(
|
|
72
|
+
compare_dir=compare_dir,
|
|
73
|
+
log_file=result.log_file,
|
|
74
|
+
total_file_count=total_file_count,
|
|
75
|
+
total_size=total_size,
|
|
76
|
+
src_file_new_count=0,
|
|
77
|
+
file_size_missmatch=0,
|
|
78
|
+
file_hash_missmatch=0,
|
|
79
|
+
small_file_count=1,
|
|
80
|
+
size_db_missing_count=1,
|
|
81
|
+
hash_db_missing_count=1,
|
|
82
|
+
successful_file_count=total_file_count,
|
|
83
|
+
error_count=0,
|
|
84
|
+
),
|
|
85
|
+
redirected_out.stdout,
|
|
86
|
+
)
|
|
@@ -65,7 +65,7 @@ complete help for main CLI app:
|
|
|
65
65
|
|
|
66
66
|
[comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
|
|
67
67
|
```
|
|
68
|
-
usage: phlb [-h] {backup,rebuild,version}
|
|
68
|
+
usage: phlb [-h] {backup,compare,rebuild,version}
|
|
69
69
|
|
|
70
70
|
|
|
71
71
|
|
|
@@ -75,6 +75,7 @@ usage: phlb [-h] {backup,rebuild,version}
|
|
|
75
75
|
╭─ subcommands ────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
76
76
|
│ (required) │
|
|
77
77
|
│ • backup Backup the source directory to the destination directory using hard links for deduplication. │
|
|
78
|
+
│ • compare Compares a source tree with the last backup and validates all known file hashes. │
|
|
78
79
|
│ • rebuild Rebuild the file hash and size database by scanning all backup files. And also verify SHA256SUMS and/or │
|
|
79
80
|
│ store missing hashes in SHA256SUMS files. │
|
|
80
81
|
│ • version Print version and exit │
|
|
@@ -216,6 +217,8 @@ Overview of main changes:
|
|
|
216
217
|
|
|
217
218
|
[comment]: <> (✂✂✂ auto generated history start ✂✂✂)
|
|
218
219
|
|
|
220
|
+
* [v1.5.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.4.1...v1.5.0)
|
|
221
|
+
* 2026-01-17 - NEW: Compare command to verify source tree with last backup
|
|
219
222
|
* [v1.4.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.4.0...v1.4.1)
|
|
220
223
|
* 2026-01-16 - Bugfix large file handling
|
|
221
224
|
* [v1.4.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.3.0...v1.4.0)
|
|
@@ -228,15 +231,15 @@ Overview of main changes:
|
|
|
228
231
|
* 2026-01-15 - Add tests for rebuild
|
|
229
232
|
* 2026-01-15 - Add command to "rebuld" the size and hash filesystem database
|
|
230
233
|
* 2026-01-15 - Add screenshots in the README
|
|
234
|
+
|
|
235
|
+
<details><summary>Expand older history entries ...</summary>
|
|
236
|
+
|
|
231
237
|
* [v1.2.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.1.0...v1.2.0)
|
|
232
238
|
* 2026-01-15 - Add error handling: Log exception but continue with the backup
|
|
233
239
|
* 2026-01-15 - Check permission and hadlink support on destination path
|
|
234
240
|
* 2026-01-14 - Enhance progress bars
|
|
235
241
|
* 2026-01-14 - A a note to rsync --link-dest
|
|
236
242
|
* 2026-01-14 - Use cli_base.cli_tools.test_utils.base_testcases
|
|
237
|
-
|
|
238
|
-
<details><summary>Expand older history entries ...</summary>
|
|
239
|
-
|
|
240
243
|
* [v1.1.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.1...v1.1.0)
|
|
241
244
|
* 2026-01-14 - Change backup timestamp directory to old schema: '%Y-%m-%d-%H%M%S'
|
|
242
245
|
* 2026-01-14 - Add "Overview of main changes" to README
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_dev/shell_completion.py
RENAMED
|
File without changes
|
|
File without changes
|
{pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_dev/update_readme_history.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_project_setup.py
RENAMED
|
File without changes
|
|
File without changes
|
{pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_readme_history.py
RENAMED
|
File without changes
|
{pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_rebuild_database.py
RENAMED
|
File without changes
|
|
File without changes
|
{pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/file_hash_database.py
RENAMED
|
File without changes
|
{pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/file_size_database.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/tests/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{pyhardlinkbackup-1.4.1 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/tyro_cli_shared_args.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|