PyHardLinkBackup 1.4.0__tar.gz → 1.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PKG-INFO +10 -5
  2. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/__init__.py +1 -1
  3. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/backup.py +2 -1
  4. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_app/phlb.py +37 -1
  5. pyhardlinkbackup-1.5.0/PyHardLinkBackup/compare_backup.py +212 -0
  6. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_backup.py +93 -61
  7. pyhardlinkbackup-1.5.0/PyHardLinkBackup/tests/test_compare_backup.py +86 -0
  8. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/README.md +9 -4
  9. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/.editorconfig +0 -0
  10. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/.github/workflows/tests.yml +0 -0
  11. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/.gitignore +0 -0
  12. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/.idea/.gitignore +0 -0
  13. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/.pre-commit-config.yaml +0 -0
  14. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/.pre-commit-hooks.yaml +0 -0
  15. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/.run/Template Python tests.run.xml +0 -0
  16. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/.run/Unittests - __all__.run.xml +0 -0
  17. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/.run/cli.py --help.run.xml +0 -0
  18. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/.run/dev-cli update.run.xml +0 -0
  19. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/.run/only DocTests.run.xml +0 -0
  20. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/.run/only DocWrite.run.xml +0 -0
  21. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/.venv-app/lib/python3.12/site-packages/cli_base/tests/shell_complete_snapshots/.gitignore +0 -0
  22. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/__main__.py +0 -0
  23. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_app/__init__.py +0 -0
  24. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_dev/__init__.py +0 -0
  25. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_dev/benchmark.py +0 -0
  26. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_dev/code_style.py +0 -0
  27. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_dev/packaging.py +0 -0
  28. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_dev/shell_completion.py +0 -0
  29. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_dev/testing.py +0 -0
  30. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/cli_dev/update_readme_history.py +0 -0
  31. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/constants.py +0 -0
  32. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/logging_setup.py +0 -0
  33. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/rebuild_databases.py +0 -0
  34. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/__init__.py +0 -0
  35. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_doc_write.py +0 -0
  36. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_doctests.py +0 -0
  37. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_project_setup.py +0 -0
  38. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_readme.py +0 -0
  39. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_readme_history.py +0 -0
  40. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/tests/test_rebuild_database.py +0 -0
  41. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/__init__.py +0 -0
  42. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/file_hash_database.py +0 -0
  43. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/file_size_database.py +0 -0
  44. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/filesystem.py +0 -0
  45. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/humanize.py +0 -0
  46. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/rich_utils.py +0 -0
  47. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/sha256sums.py +0 -0
  48. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/tee.py +0 -0
  49. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/tests/__init__.py +0 -0
  50. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +0 -0
  51. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/tests/test_file_size_database.py +0 -0
  52. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/tests/test_filesystem.py +0 -0
  53. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/PyHardLinkBackup/utilities/tyro_cli_shared_args.py +0 -0
  54. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/cli.py +0 -0
  55. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/dev-cli.py +0 -0
  56. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/dist/.gitignore +0 -0
  57. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/docs/README.md +0 -0
  58. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/docs/about-docs.md +0 -0
  59. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/noxfile.py +0 -0
  60. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/pyproject.toml +0 -0
  61. {pyhardlinkbackup-1.4.0 → pyhardlinkbackup-1.5.0}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PyHardLinkBackup
3
- Version: 1.4.0
3
+ Version: 1.5.0
4
4
  Summary: HardLink/Deduplication Backups with Python
5
5
  Project-URL: Documentation, https://github.com/jedie/PyHardLinkBackup
6
6
  Project-URL: Source, https://github.com/jedie/PyHardLinkBackup
@@ -80,7 +80,7 @@ complete help for main CLI app:
80
80
 
81
81
  [comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
82
82
  ```
83
- usage: phlb [-h] {backup,rebuild,version}
83
+ usage: phlb [-h] {backup,compare,rebuild,version}
84
84
 
85
85
 
86
86
 
@@ -90,6 +90,7 @@ usage: phlb [-h] {backup,rebuild,version}
90
90
  ╭─ subcommands ────────────────────────────────────────────────────────────────────────────────────────────────────────╮
91
91
  │ (required) │
92
92
  │ • backup Backup the source directory to the destination directory using hard links for deduplication. │
93
+ │ • compare Compares a source tree with the last backup and validates all known file hashes. │
93
94
  │ • rebuild Rebuild the file hash and size database by scanning all backup files. And also verify SHA256SUMS and/or │
94
95
  │ store missing hashes in SHA256SUMS files. │
95
96
  │ • version Print version and exit │
@@ -231,6 +232,10 @@ Overview of main changes:
231
232
 
232
233
  [comment]: <> (✂✂✂ auto generated history start ✂✂✂)
233
234
 
235
+ * [v1.5.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.4.1...v1.5.0)
236
+ * 2026-01-17 - NEW: Compare command to verify source tree with last backup
237
+ * [v1.4.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.4.0...v1.4.1)
238
+ * 2026-01-16 - Bugfix large file handling
234
239
  * [v1.4.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.3.0...v1.4.0)
235
240
  * 2026-01-16 - Create log file in backup and a summary.txt
236
241
  * 2026-01-16 - Run CI tests on macos, too.
@@ -241,6 +246,9 @@ Overview of main changes:
241
246
  * 2026-01-15 - Add tests for rebuild
242
247
  * 2026-01-15 - Add command to "rebuld" the size and hash filesystem database
243
248
  * 2026-01-15 - Add screenshots in the README
249
+
250
+ <details><summary>Expand older history entries ...</summary>
251
+
244
252
  * [v1.2.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.1.0...v1.2.0)
245
253
  * 2026-01-15 - Add error handling: Log exception but continue with the backup
246
254
  * 2026-01-15 - Check permission and hadlink support on destination path
@@ -250,9 +258,6 @@ Overview of main changes:
250
258
  * [v1.1.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.1...v1.1.0)
251
259
  * 2026-01-14 - Change backup timestamp directory to old schema: '%Y-%m-%d-%H%M%S'
252
260
  * 2026-01-14 - Add "Overview of main changes" to README
253
-
254
- <details><summary>Expand older history entries ...</summary>
255
-
256
261
  * [v1.0.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.0...v1.0.1)
257
262
  * 2026-01-13 - Store SHA256SUMS files in backup directories
258
263
  * [v1.0.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.13.0...v1.0.0)
@@ -3,5 +3,5 @@
3
3
  """
4
4
 
5
5
  # See https://packaging.python.org/en/latest/specifications/version-specifiers/
6
- __version__ = '1.4.0'
6
+ __version__ = '1.5.0'
7
7
  __author__ = 'Jens Diemer <PyHardLinkBackup@jensdiemer.de>'
@@ -138,12 +138,13 @@ def backup_one_file(
138
138
  backup_result.hardlinked_size += size
139
139
  else:
140
140
  logger.info('Copy unique file: %s to %s', src_path, dst_path)
141
+ file_hash = copy_and_hash(src_path, dst_path)
141
142
  hash_db[file_hash] = dst_path
142
143
  backup_result.copied_files += 1
143
144
  backup_result.copied_size += size
144
145
 
145
146
  # Keep original file metadata (permission bits, time stamps, and flags)
146
- shutil.copy2(src_path, dst_path)
147
+ shutil.copystat(src_path, dst_path)
147
148
  else:
148
149
  # A file with this size not backuped before -> Can't be duplicate -> copy and hash
149
150
  file_hash = copy_and_hash(src_path, dst_path)
@@ -5,7 +5,7 @@ from typing import Annotated
5
5
  import tyro
6
6
  from rich import print # noqa
7
7
 
8
- from PyHardLinkBackup import rebuild_databases
8
+ from PyHardLinkBackup import compare_backup, rebuild_databases
9
9
  from PyHardLinkBackup.backup import backup_tree
10
10
  from PyHardLinkBackup.cli_app import app
11
11
  from PyHardLinkBackup.logging_setup import (
@@ -60,6 +60,42 @@ def backup(
60
60
  )
61
61
 
62
62
 
63
+ @app.command
64
+ def compare(
65
+ src: Annotated[
66
+ Path,
67
+ tyro.conf.arg(
68
+ metavar='source',
69
+ help='Source directory that should be compared with the last backup.',
70
+ ),
71
+ ],
72
+ dst: Annotated[
73
+ Path,
74
+ tyro.conf.arg(
75
+ metavar='destination',
76
+ help='Destination directory with the backups. Will pick the last backup for comparison.',
77
+ ),
78
+ ],
79
+ /,
80
+ excludes: TyroExcludeDirectoriesArgType = DEFAULT_EXCLUDE_DIRECTORIES,
81
+ verbosity: TyroConsoleLogLevelArgType = DEFAULT_CONSOLE_LOG_LEVEL,
82
+ log_file_level: TyroLogFileLevelArgType = DEFAULT_LOG_FILE_LEVEL,
83
+ ) -> None:
84
+ """
85
+ Compares a source tree with the last backup and validates all known file hashes.
86
+ """
87
+ log_manager = LoggingManager(
88
+ console_level=verbosity,
89
+ file_level=log_file_level,
90
+ )
91
+ compare_backup.compare_tree(
92
+ src_root=src,
93
+ backup_root=dst,
94
+ excludes=excludes,
95
+ log_manager=log_manager,
96
+ )
97
+
98
+
63
99
  @app.command
64
100
  def rebuild(
65
101
  backup_root: Annotated[
@@ -0,0 +1,212 @@
1
+ import dataclasses
2
+ import datetime
3
+ import logging
4
+ import os
5
+ import sys
6
+ import time
7
+ from pathlib import Path
8
+
9
+ from rich import print # noqa
10
+
11
+ from PyHardLinkBackup.logging_setup import LoggingManager
12
+ from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
13
+ from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
14
+ from PyHardLinkBackup.utilities.filesystem import (
15
+ hash_file,
16
+ humanized_fs_scan,
17
+ iter_scandir_files,
18
+ )
19
+ from PyHardLinkBackup.utilities.humanize import PrintTimingContextManager, human_filesize
20
+ from PyHardLinkBackup.utilities.rich_utils import DisplayFileTreeProgress
21
+ from PyHardLinkBackup.utilities.tee import TeeStdoutContext
22
+
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ @dataclasses.dataclass
28
+ class CompareResult:
29
+ compare_dir: Path
30
+ log_file: Path
31
+ #
32
+ total_file_count: int = 0
33
+ total_size: int = 0
34
+ #
35
+ src_file_new_count: int = 0
36
+ file_size_missmatch: int = 0
37
+ file_hash_missmatch: int = 0
38
+ #
39
+ small_file_count: int = 0
40
+ size_db_missing_count: int = 0
41
+ hash_db_missing_count: int = 0
42
+ #
43
+ successful_file_count: int = 0
44
+ error_count: int = 0
45
+
46
+
47
+ def compare_one_file(
48
+ *,
49
+ src_root: Path,
50
+ entry: os.DirEntry,
51
+ size_db: FileSizeDatabase,
52
+ hash_db: FileHashDatabase,
53
+ compare_dir: Path,
54
+ compare_result: CompareResult,
55
+ ) -> None:
56
+ src_size = entry.stat().st_size
57
+
58
+ # For the progress bars:
59
+ compare_result.total_file_count += 1
60
+ compare_result.total_size += src_size
61
+
62
+ src_path = Path(entry.path)
63
+ dst_path = compare_dir / src_path.relative_to(src_root)
64
+
65
+ if not dst_path.exists():
66
+ logger.warning('Source file %s not found in compare %s', src_path, dst_path)
67
+ compare_result.src_file_new_count += 1
68
+ return
69
+
70
+ dst_size = dst_path.stat().st_size
71
+ if src_size != dst_size:
72
+ logger.warning(
73
+ 'Source file %s size (%i Bytes) differs from compare file %s size (%iBytes)',
74
+ src_path,
75
+ src_size,
76
+ dst_path,
77
+ dst_size,
78
+ )
79
+ compare_result.file_size_missmatch += 1
80
+ return
81
+
82
+ src_hash = hash_file(src_path)
83
+ dst_hash = hash_file(dst_path)
84
+
85
+ if src_hash != dst_hash:
86
+ logger.warning(
87
+ 'Source file %s hash %r differs from compare file %s hash (%s)',
88
+ src_path,
89
+ src_hash,
90
+ dst_path,
91
+ dst_hash,
92
+ )
93
+ compare_result.file_hash_missmatch += 1
94
+ return
95
+
96
+ if src_size < size_db.MIN_SIZE:
97
+ # Small file -> Not in deduplication database
98
+ compare_result.small_file_count += 1
99
+ else:
100
+ if src_size not in size_db:
101
+ logger.warning(
102
+ 'Source file %s size (%i Bytes) not found in deduplication database',
103
+ src_path,
104
+ src_size,
105
+ )
106
+ compare_result.size_db_missing_count += 1
107
+
108
+ if src_hash not in hash_db:
109
+ logger.warning(
110
+ 'Source file %s hash %r not found in deduplication database',
111
+ src_path,
112
+ src_hash,
113
+ )
114
+ compare_result.hash_db_missing_count += 1
115
+
116
+ # Everything is ok
117
+ compare_result.successful_file_count += 1
118
+
119
+
120
+ def compare_tree(
121
+ *,
122
+ src_root: Path,
123
+ backup_root: Path,
124
+ excludes: tuple[str, ...],
125
+ log_manager: LoggingManager,
126
+ ) -> CompareResult:
127
+ src_root = src_root.resolve()
128
+ if not src_root.is_dir():
129
+ print('Error: Source directory does not exist!')
130
+ print(f'Please check source directory: "{src_root}"\n')
131
+ sys.exit(1)
132
+
133
+ backup_root = backup_root.resolve()
134
+ phlb_conf_dir = backup_root / '.phlb'
135
+ if not phlb_conf_dir.is_dir():
136
+ print('Error: Compare directory seems to be wrong! (No .phlb configuration directory found)')
137
+ print(f'Please check backup directory: "{backup_root}"\n')
138
+ sys.exit(1)
139
+
140
+ compare_main_dir = backup_root / src_root.name
141
+ timestamps = sorted(
142
+ path.name for path in compare_main_dir.iterdir() if path.is_dir() and path.name.startswith('20')
143
+ )
144
+ print(f'Found {len(timestamps)} compare(s) in {compare_main_dir}:')
145
+ for timestamp in timestamps:
146
+ print(f' * {timestamp}')
147
+ last_timestamp = timestamps[-1]
148
+ compare_dir = compare_main_dir / last_timestamp
149
+ print(f'\nComparing source tree {src_root} with {last_timestamp} compare:')
150
+ print(f' {compare_dir}\n')
151
+
152
+ now_timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S')
153
+ log_file = compare_main_dir / f'{now_timestamp}-compare.log'
154
+ log_manager.start_file_logging(log_file)
155
+
156
+ excludes: set = set(excludes)
157
+ with PrintTimingContextManager('Filesystem scan completed in'):
158
+ src_file_count, src_total_size = humanized_fs_scan(src_root, excludes=excludes)
159
+
160
+ with DisplayFileTreeProgress(src_file_count, src_total_size) as progress:
161
+ # init "databases":
162
+ size_db = FileSizeDatabase(phlb_conf_dir)
163
+ hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
164
+
165
+ compare_result = CompareResult(compare_dir=compare_dir, log_file=log_file)
166
+
167
+ next_update = 0
168
+ for entry in iter_scandir_files(src_root, excludes=excludes):
169
+ try:
170
+ compare_one_file(
171
+ src_root=src_root,
172
+ entry=entry,
173
+ size_db=size_db,
174
+ hash_db=hash_db,
175
+ compare_dir=compare_dir,
176
+ compare_result=compare_result,
177
+ )
178
+ except Exception as err:
179
+ logger.exception(f'Compare {entry.path} {err.__class__.__name__}: {err}')
180
+ compare_result.error_count += 1
181
+ else:
182
+ now = time.monotonic()
183
+ if now >= next_update:
184
+ progress.update(
185
+ completed_file_count=compare_result.total_file_count,
186
+ completed_size=compare_result.total_size,
187
+ )
188
+ next_update = now + 0.5
189
+
190
+ # Finalize progress indicator values:
191
+ progress.update(completed_file_count=compare_result.total_file_count, completed_size=compare_result.total_size)
192
+
193
+ summary_file = compare_main_dir / f'{now_timestamp}-summary.txt'
194
+ with TeeStdoutContext(summary_file):
195
+ print(f'\nCompare complete: {compare_dir} (total size {human_filesize(compare_result.total_size)})\n')
196
+ print(f' Total files processed: {compare_result.total_file_count}')
197
+ print(f' * Successful compared files: {compare_result.successful_file_count}')
198
+ print(f' * New source files: {compare_result.src_file_new_count}')
199
+ print(f' * File size missmatch: {compare_result.file_size_missmatch}')
200
+ print(f' * File hash missmatch: {compare_result.file_hash_missmatch}')
201
+
202
+ print(f' * Small (<{size_db.MIN_SIZE} Bytes) files: {compare_result.small_file_count}')
203
+ print(f' * Missing in size DB: {compare_result.size_db_missing_count}')
204
+ print(f' * Missing in hash DB: {compare_result.hash_db_missing_count}')
205
+
206
+ if compare_result.error_count > 0:
207
+ print(f' Errors during compare: {compare_result.error_count} (see log for details)')
208
+ print()
209
+
210
+ logger.info('Compare completed. Summary created: %s', summary_file)
211
+
212
+ return compare_result
@@ -144,16 +144,13 @@ class BackupTreeTestCase(
144
144
  (sub_dir / 'file.txt').write_text('This is file in subdir')
145
145
 
146
146
  # Only files bigger than MIN_SIZE will be considered for hardlinking:
147
- size_db_min_file = src_root / 'min_sized_file1.bin'
148
- size_db_min_file.write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
147
+ (src_root / 'min_sized_file1.bin').write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
149
148
 
150
149
  # Same content and big enough to be considered for hardlinking:
151
- size_db_min_file = src_root / 'min_sized_file2.bin'
152
- size_db_min_file.write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
150
+ (src_root / 'min_sized_file2.bin').write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
153
151
 
154
152
  # Larger then CHUNK_SIZE file will be handled differently:
155
- large_file = src_root / 'large_file.bin'
156
- large_file.write_bytes(b'Y' * (CHUNK_SIZE + 1))
153
+ (src_root / 'large_file1.bin').write_bytes(b'Y' * (CHUNK_SIZE + 1))
157
154
 
158
155
  excluded_dir = src_root / '.cache'
159
156
  excluded_dir.mkdir()
@@ -176,7 +173,7 @@ class BackupTreeTestCase(
176
173
  backup_root=backup_root,
177
174
  excludes=('.cache',),
178
175
  log_manager=LoggingManager(
179
- console_level=DEFAULT_CONSOLE_LOG_LEVEL,
176
+ console_level='info',
180
177
  file_level=DEFAULT_LOG_FILE_LEVEL,
181
178
  ),
182
179
  )
@@ -208,6 +205,7 @@ class BackupTreeTestCase(
208
205
  copied_small_size=50,
209
206
  error_count=0,
210
207
  ),
208
+ redirected_out.stdout,
211
209
  )
212
210
 
213
211
  # The sources:
@@ -219,7 +217,7 @@ class BackupTreeTestCase(
219
217
  .cache/tempfile.tmp 12:00:00 file 1 38 41d7a2c9
220
218
  file2.txt 12:00:00 hardlink 2 14 8a11514a
221
219
  hardlink2file1 12:00:00 hardlink 2 14 8a11514a
222
- large_file.bin 12:00:00 file 1 67108865 9671eaac
220
+ large_file1.bin 12:00:00 file 1 67108865 9671eaac
223
221
  min_sized_file1.bin 12:00:00 file 1 1000 f0d93de4
224
222
  min_sized_file2.bin 12:00:00 file 1 1000 f0d93de4
225
223
  subdir/file.txt 12:00:00 file 1 22 c0167e63
@@ -234,10 +232,10 @@ class BackupTreeTestCase(
234
232
  root=backup_dir,
235
233
  expected_overview="""
236
234
  path birthtime type nlink size CRC32
237
- SHA256SUMS <mock> file 1 410 45c07cf7
235
+ SHA256SUMS <mock> file 1 411 b02da51e
238
236
  file2.txt 12:00:00 file 1 14 8a11514a
239
237
  hardlink2file1 12:00:00 file 1 14 8a11514a
240
- large_file.bin 12:00:00 file 1 67108865 9671eaac
238
+ large_file1.bin 12:00:00 file 1 67108865 9671eaac
241
239
  min_sized_file1.bin 12:00:00 hardlink 2 1000 f0d93de4
242
240
  min_sized_file2.bin 12:00:00 hardlink 2 1000 f0d93de4
243
241
  subdir/SHA256SUMS <mock> file 1 75 1af5ecc7
@@ -252,12 +250,31 @@ class BackupTreeTestCase(
252
250
  backup_root=backup_root,
253
251
  expected="""
254
252
  bb/c4/bbc4de2ca238d1… -> source/2026-01-01-123456/min_sized_file1.bin
255
- e6/37/e6374ac11d9049… -> source/2026-01-01-123456/large_file.bin
253
+ e6/37/e6374ac11d9049… -> source/2026-01-01-123456/large_file1.bin
256
254
  """,
257
255
  )
258
256
 
259
257
  #######################################################################################
260
- # Just backup again:
258
+ # Backup again with new added files:
259
+
260
+ # New small file with different size and different content:
261
+ (src_root / 'small_file_newA.txt').write_text('A new file')
262
+
263
+ # Add small file that size exists, but has different content:
264
+ (src_root / 'small_file_newB.txt').write_text('This is file 2')
265
+
266
+ # Bigger file with new size and new content:
267
+ (src_root / 'min_sized_file_newA.bin').write_bytes(b'A' * (FileSizeDatabase.MIN_SIZE + 1))
268
+
269
+ # Bigger file with existing size, but different content:
270
+ (src_root / 'min_sized_file_newB.bin').write_bytes(b'B' * FileSizeDatabase.MIN_SIZE)
271
+
272
+ # Add a larger then CHUNK_SIZE file with same existing size, but different content:
273
+ (src_root / 'large_file2.bin').write_bytes(b'Y' * (CHUNK_SIZE + 1))
274
+
275
+ # FIXME: freezegun doesn't handle this, see: https://github.com/spulec/freezegun/issues/392
276
+ # Set modification times to a fixed time for easier testing:
277
+ set_file_times(src_root, dt=parse_dt('2026-01-01T12:00:00+0000'))
261
278
 
262
279
  with (
263
280
  patch('PyHardLinkBackup.backup.iter_scandir_files', SortedIterScandirFiles),
@@ -269,7 +286,7 @@ class BackupTreeTestCase(
269
286
  backup_root=backup_root,
270
287
  excludes=('.cache',),
271
288
  log_manager=LoggingManager(
272
- console_level=DEFAULT_CONSOLE_LOG_LEVEL,
289
+ console_level='info',
273
290
  file_level=DEFAULT_LOG_FILE_LEVEL,
274
291
  ),
275
292
  )
@@ -280,23 +297,6 @@ class BackupTreeTestCase(
280
297
  str(Path(backup_dir).relative_to(temp_path)),
281
298
  'backup/source/2026-01-02-123456',
282
299
  )
283
- self.assertEqual(
284
- result,
285
- BackupResult(
286
- backup_dir=backup_dir,
287
- log_file=result.log_file,
288
- backup_count=7,
289
- backup_size=67110929,
290
- symlink_files=1,
291
- hardlinked_files=3, # <<< More hardlinks this time!
292
- hardlinked_size=67110865,
293
- copied_files=3,
294
- copied_size=50,
295
- copied_small_files=3,
296
- copied_small_size=50,
297
- error_count=0,
298
- ),
299
- )
300
300
  # The second backup:
301
301
  # * /.cache/ -> excluded
302
302
  # * min_sized_file1.bin and min_sized_file2.bin -> hardlinked
@@ -304,26 +304,51 @@ class BackupTreeTestCase(
304
304
  assert_fs_tree_overview(
305
305
  root=backup_dir,
306
306
  expected_overview="""
307
- path birthtime type nlink size CRC32
308
- SHA256SUMS <mock> file 1 410 45c07cf7
309
- file2.txt 12:00:00 file 1 14 8a11514a
310
- hardlink2file1 12:00:00 file 1 14 8a11514a
311
- large_file.bin 12:00:00 hardlink 2 67108865 9671eaac
312
- min_sized_file1.bin 12:00:00 hardlink 4 1000 f0d93de4
313
- min_sized_file2.bin 12:00:00 hardlink 4 1000 f0d93de4
314
- subdir/SHA256SUMS <mock> file 1 75 1af5ecc7
315
- subdir/file.txt 12:00:00 file 1 22 c0167e63
316
- symlink2file1 12:00:00 symlink 2 14 8a11514a
307
+ path birthtime type nlink size CRC32
308
+ SHA256SUMS <mock> file 1 845 6596856a
309
+ file2.txt 12:00:00 file 1 14 8a11514a
310
+ hardlink2file1 12:00:00 file 1 14 8a11514a
311
+ large_file1.bin 12:00:00 hardlink 3 67108865 9671eaac
312
+ large_file2.bin 12:00:00 hardlink 3 67108865 9671eaac
313
+ min_sized_file1.bin 12:00:00 hardlink 4 1000 f0d93de4
314
+ min_sized_file2.bin 12:00:00 hardlink 4 1000 f0d93de4
315
+ min_sized_file_newA.bin 12:00:00 file 1 1001 a48f0e33
316
+ min_sized_file_newB.bin 12:00:00 file 1 1000 7d9c564d
317
+ small_file_newA.txt 12:00:00 file 1 10 76d1acf1
318
+ small_file_newB.txt 12:00:00 file 1 14 131800f0
319
+ subdir/SHA256SUMS <mock> file 1 75 1af5ecc7
320
+ subdir/file.txt 12:00:00 file 1 22 c0167e63
321
+ symlink2file1 12:00:00 symlink 2 14 8a11514a
317
322
  """,
318
323
  )
324
+ self.assertEqual(
325
+ result,
326
+ BackupResult(
327
+ backup_dir=backup_dir,
328
+ log_file=result.log_file,
329
+ backup_count=12,
330
+ backup_size=134221819,
331
+ symlink_files=1,
332
+ hardlinked_files=4,
333
+ hardlinked_size=134219730,
334
+ copied_files=7,
335
+ copied_size=2075,
336
+ copied_small_files=5,
337
+ copied_small_size=74,
338
+ error_count=0,
339
+ ),
340
+ redirected_out.stdout,
341
+ )
319
342
 
320
343
  # The FileHashDatabase remains the same:
321
344
  with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
322
345
  assert_hash_db_info(
323
346
  backup_root=backup_root,
324
347
  expected="""
348
+ 23/d2/23d2ce40d26211… -> source/2026-01-02-123456/min_sized_file_newA.bin
349
+ 9a/56/9a567077114134… -> source/2026-01-02-123456/min_sized_file_newB.bin
325
350
  bb/c4/bbc4de2ca238d1… -> source/2026-01-01-123456/min_sized_file1.bin
326
- e6/37/e6374ac11d9049… -> source/2026-01-01-123456/large_file.bin
351
+ e6/37/e6374ac11d9049… -> source/2026-01-01-123456/large_file1.bin
327
352
  """,
328
353
  )
329
354
 
@@ -365,16 +390,21 @@ class BackupTreeTestCase(
365
390
  assert_fs_tree_overview(
366
391
  root=backup_dir,
367
392
  expected_overview="""
368
- path birthtime type nlink size CRC32
369
- SHA256SUMS <mock> file 1 410 45c07cf7
370
- file2.txt 12:00:00 file 1 14 8a11514a
371
- hardlink2file1 12:00:00 file 1 14 8a11514a
372
- large_file.bin 12:00:00 hardlink 3 67108865 9671eaac
373
- min_sized_file1.bin 12:00:00 hardlink 2 1000 f0d93de4
374
- min_sized_file2.bin 12:00:00 hardlink 2 1000 f0d93de4
375
- subdir/SHA256SUMS <mock> file 1 75 1af5ecc7
376
- subdir/file.txt 12:00:00 file 1 22 c0167e63
377
- symlink2file1 12:00:00 symlink 2 14 8a11514a
393
+ path birthtime type nlink size CRC32
394
+ SHA256SUMS <mock> file 1 845 6596856a
395
+ file2.txt 12:00:00 file 1 14 8a11514a
396
+ hardlink2file1 12:00:00 file 1 14 8a11514a
397
+ large_file1.bin 12:00:00 hardlink 5 67108865 9671eaac
398
+ large_file2.bin 12:00:00 hardlink 5 67108865 9671eaac
399
+ min_sized_file1.bin 12:00:00 hardlink 2 1000 f0d93de4
400
+ min_sized_file2.bin 12:00:00 hardlink 2 1000 f0d93de4
401
+ min_sized_file_newA.bin 12:00:00 hardlink 2 1001 a48f0e33
402
+ min_sized_file_newB.bin 12:00:00 hardlink 2 1000 7d9c564d
403
+ small_file_newA.txt 12:00:00 file 1 10 76d1acf1
404
+ small_file_newB.txt 12:00:00 file 1 14 131800f0
405
+ subdir/SHA256SUMS <mock> file 1 75 1af5ecc7
406
+ subdir/file.txt 12:00:00 file 1 22 c0167e63
407
+ symlink2file1 12:00:00 symlink 2 14 8a11514a
378
408
  """,
379
409
  )
380
410
 
@@ -383,16 +413,16 @@ class BackupTreeTestCase(
383
413
  BackupResult(
384
414
  backup_dir=backup_dir,
385
415
  log_file=result.log_file,
386
- backup_count=7,
387
- backup_size=67110929,
416
+ backup_count=12,
417
+ backup_size=134221819,
388
418
  symlink_files=1,
389
- hardlinked_files=2, # <<< Less hardlinks this time, because of missing link source!
390
- hardlinked_size=67109865,
391
- copied_files=4,
392
- copied_size=1050,
393
- copied_small_files=3,
394
- copied_small_size=50,
395
- error_count=0,
419
+ hardlinked_files=5,
420
+ hardlinked_size=134220731,
421
+ copied_files=6,
422
+ copied_size=1074,
423
+ copied_small_files=5,
424
+ copied_small_size=74,
425
+ error_count=0
396
426
  ),
397
427
  )
398
428
 
@@ -402,8 +432,10 @@ class BackupTreeTestCase(
402
432
  assert_hash_db_info(
403
433
  backup_root=backup_root,
404
434
  expected="""
435
+ 23/d2/23d2ce40d26211… -> source/2026-01-02-123456/min_sized_file_newA.bin
436
+ 9a/56/9a567077114134… -> source/2026-01-02-123456/min_sized_file_newB.bin
405
437
  bb/c4/bbc4de2ca238d1… -> source/2026-01-03-123456/min_sized_file1.bin
406
- e6/37/e6374ac11d9049… -> source/2026-01-01-123456/large_file.bin
438
+ e6/37/e6374ac11d9049… -> source/2026-01-01-123456/large_file1.bin
407
439
  """,
408
440
  )
409
441
 
@@ -0,0 +1,86 @@
1
+ import shutil
2
+ import tempfile
3
+ from pathlib import Path
4
+ from unittest import TestCase
5
+
6
+ from bx_py_utils.test_utils.redirect import RedirectOut
7
+ from cli_base.cli_tools.test_utils.base_testcases import OutputMustCapturedTestCaseMixin
8
+
9
+ from PyHardLinkBackup.compare_backup import CompareResult, LoggingManager, compare_tree
10
+ from PyHardLinkBackup.logging_setup import DEFAULT_LOG_FILE_LEVEL
11
+ from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
12
+ from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
13
+ from PyHardLinkBackup.utilities.filesystem import hash_file
14
+
15
+
16
+ class CompareBackupTestCase(OutputMustCapturedTestCaseMixin, TestCase):
17
+ def test_happy_path(self):
18
+ with tempfile.TemporaryDirectory() as src_dir, tempfile.TemporaryDirectory() as backup_dir:
19
+ src_root = Path(src_dir).resolve()
20
+ backup_root = Path(backup_dir).resolve()
21
+
22
+ # Setup backup structure
23
+ phlb_conf_dir = backup_root / '.phlb'
24
+ phlb_conf_dir.mkdir()
25
+
26
+ compare_main_dir = backup_root / src_root.name
27
+ compare_main_dir.mkdir()
28
+
29
+ timestamp = '2026-01-17-120000'
30
+ compare_dir = compare_main_dir / timestamp
31
+ compare_dir.mkdir()
32
+
33
+ # Create source files
34
+ (src_root / 'small_file.txt').write_text('hello world')
35
+ (src_root / 'large_file_missing.txt').write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
36
+ large_file_in_dbs = src_root / 'large_file_in_dbs.txt'
37
+ large_file_in_dbs.write_bytes(b'Y' * (FileSizeDatabase.MIN_SIZE + 1))
38
+
39
+ # Copy files to backup
40
+ total_size = 0
41
+ total_file_count = 0
42
+ for file_path in src_root.iterdir():
43
+ shutil.copy2(file_path, compare_dir / file_path.name)
44
+ total_size += file_path.stat().st_size
45
+ total_file_count += 1
46
+ self.assertEqual(total_file_count, 3)
47
+ self.assertEqual(total_size, 2012)
48
+
49
+ # Create databases and add values from 'large_file_in_dbs.txt'
50
+ size_db = FileSizeDatabase(phlb_conf_dir)
51
+ size_db.add(FileSizeDatabase.MIN_SIZE + 1)
52
+ hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
53
+ src_hash = hash_file(large_file_in_dbs)
54
+ hash_db[src_hash] = compare_dir / 'large_file_in_dbs.txt'
55
+
56
+ # Run compare_tree
57
+ with RedirectOut() as redirected_out:
58
+ result = compare_tree(
59
+ src_root=src_root,
60
+ backup_root=backup_root,
61
+ excludes=(),
62
+ log_manager=LoggingManager(
63
+ console_level='info',
64
+ file_level=DEFAULT_LOG_FILE_LEVEL,
65
+ ),
66
+ )
67
+ self.assertEqual(redirected_out.stderr, '')
68
+ self.assertIn('Compare completed.', redirected_out.stdout)
69
+ self.assertEqual(
70
+ result,
71
+ CompareResult(
72
+ compare_dir=compare_dir,
73
+ log_file=result.log_file,
74
+ total_file_count=total_file_count,
75
+ total_size=total_size,
76
+ src_file_new_count=0,
77
+ file_size_missmatch=0,
78
+ file_hash_missmatch=0,
79
+ small_file_count=1,
80
+ size_db_missing_count=1,
81
+ hash_db_missing_count=1,
82
+ successful_file_count=total_file_count,
83
+ error_count=0,
84
+ ),
85
+ redirected_out.stdout,
86
+ )
@@ -65,7 +65,7 @@ complete help for main CLI app:
65
65
 
66
66
  [comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
67
67
  ```
68
- usage: phlb [-h] {backup,rebuild,version}
68
+ usage: phlb [-h] {backup,compare,rebuild,version}
69
69
 
70
70
 
71
71
 
@@ -75,6 +75,7 @@ usage: phlb [-h] {backup,rebuild,version}
75
75
  ╭─ subcommands ────────────────────────────────────────────────────────────────────────────────────────────────────────╮
76
76
  │ (required) │
77
77
  │ • backup Backup the source directory to the destination directory using hard links for deduplication. │
78
+ │ • compare Compares a source tree with the last backup and validates all known file hashes. │
78
79
  │ • rebuild Rebuild the file hash and size database by scanning all backup files. And also verify SHA256SUMS and/or │
79
80
  │ store missing hashes in SHA256SUMS files. │
80
81
  │ • version Print version and exit │
@@ -216,6 +217,10 @@ Overview of main changes:
216
217
 
217
218
  [comment]: <> (✂✂✂ auto generated history start ✂✂✂)
218
219
 
220
+ * [v1.5.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.4.1...v1.5.0)
221
+ * 2026-01-17 - NEW: Compare command to verify source tree with last backup
222
+ * [v1.4.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.4.0...v1.4.1)
223
+ * 2026-01-16 - Bugfix large file handling
219
224
  * [v1.4.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.3.0...v1.4.0)
220
225
  * 2026-01-16 - Create log file in backup and a summary.txt
221
226
  * 2026-01-16 - Run CI tests on macos, too.
@@ -226,6 +231,9 @@ Overview of main changes:
226
231
  * 2026-01-15 - Add tests for rebuild
227
232
  * 2026-01-15 - Add command to "rebuld" the size and hash filesystem database
228
233
  * 2026-01-15 - Add screenshots in the README
234
+
235
+ <details><summary>Expand older history entries ...</summary>
236
+
229
237
  * [v1.2.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.1.0...v1.2.0)
230
238
  * 2026-01-15 - Add error handling: Log exception but continue with the backup
231
239
  * 2026-01-15 - Check permission and hadlink support on destination path
@@ -235,9 +243,6 @@ Overview of main changes:
235
243
  * [v1.1.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.1...v1.1.0)
236
244
  * 2026-01-14 - Change backup timestamp directory to old schema: '%Y-%m-%d-%H%M%S'
237
245
  * 2026-01-14 - Add "Overview of main changes" to README
238
-
239
- <details><summary>Expand older history entries ...</summary>
240
-
241
246
  * [v1.0.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.0...v1.0.1)
242
247
  * 2026-01-13 - Store SHA256SUMS files in backup directories
243
248
  * [v1.0.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.13.0...v1.0.0)