PyHardLinkBackup 1.2.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PKG-INFO +33 -13
  2. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/__init__.py +1 -1
  3. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/backup.py +7 -20
  4. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/cli_app/phlb.py +21 -0
  5. pyhardlinkbackup-1.3.0/PyHardLinkBackup/rebuild_databases.py +147 -0
  6. pyhardlinkbackup-1.3.0/PyHardLinkBackup/tests/test_rebuild_database.py +204 -0
  7. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/utilities/file_hash_database.py +4 -0
  8. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/utilities/rich_utils.py +8 -8
  9. pyhardlinkbackup-1.3.0/PyHardLinkBackup/utilities/sha256sums.py +61 -0
  10. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +2 -0
  11. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/README.md +32 -12
  12. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/.editorconfig +0 -0
  13. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/.github/workflows/tests.yml +0 -0
  14. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/.gitignore +0 -0
  15. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/.idea/.gitignore +0 -0
  16. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/.pre-commit-config.yaml +0 -0
  17. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/.pre-commit-hooks.yaml +0 -0
  18. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/.run/Template Python tests.run.xml +0 -0
  19. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/.run/Unittests - __all__.run.xml +0 -0
  20. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/.run/cli.py --help.run.xml +0 -0
  21. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/.run/dev-cli update.run.xml +0 -0
  22. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/.run/only DocTests.run.xml +0 -0
  23. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/.run/only DocWrite.run.xml +0 -0
  24. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/.venv-app/lib/python3.12/site-packages/cli_base/tests/shell_complete_snapshots/.gitignore +0 -0
  25. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/__main__.py +0 -0
  26. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/cli_app/__init__.py +0 -0
  27. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/cli_dev/__init__.py +0 -0
  28. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/cli_dev/benchmark.py +0 -0
  29. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/cli_dev/code_style.py +0 -0
  30. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/cli_dev/packaging.py +0 -0
  31. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/cli_dev/shell_completion.py +0 -0
  32. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/cli_dev/testing.py +0 -0
  33. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/cli_dev/update_readme_history.py +0 -0
  34. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/constants.py +0 -0
  35. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/tests/__init__.py +0 -0
  36. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/tests/test_backup.py +0 -0
  37. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/tests/test_doc_write.py +0 -0
  38. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/tests/test_doctests.py +0 -0
  39. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/tests/test_project_setup.py +0 -0
  40. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/tests/test_readme.py +0 -0
  41. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/tests/test_readme_history.py +0 -0
  42. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/utilities/__init__.py +0 -0
  43. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/utilities/file_size_database.py +0 -0
  44. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/utilities/filesystem.py +0 -0
  45. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/utilities/humanize.py +0 -0
  46. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/utilities/tests/__init__.py +0 -0
  47. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/utilities/tests/test_file_size_database.py +0 -0
  48. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/PyHardLinkBackup/utilities/tests/test_filesystem.py +0 -0
  49. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/cli.py +0 -0
  50. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/dev-cli.py +0 -0
  51. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/dist/.gitignore +0 -0
  52. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/docs/README.md +0 -0
  53. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/docs/about-docs.md +0 -0
  54. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/noxfile.py +0 -0
  55. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/pyproject.toml +0 -0
  56. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.3.0}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PyHardLinkBackup
3
- Version: 1.2.0
3
+ Version: 1.3.0
4
4
  Summary: HardLink/Deduplication Backups with Python
5
5
  Project-URL: Documentation, https://github.com/jedie/PyHardLinkBackup
6
6
  Project-URL: Source, https://github.com/jedie/PyHardLinkBackup
@@ -63,22 +63,36 @@ Backup the source directory to the destination directory using hard links for de
63
63
 
64
64
 
65
65
 
66
+ Running a backup looks like:
67
+
68
+ ![2026-01-15-phlb1.png](https://raw.githubusercontent.com/jedie/jedie.github.io/main/screenshots/PyHardLinkBackup/2026-01-15-phlb1.png "2026-01-15-phlb1.png")
69
+
70
+
71
+
72
+ If it's finished it display a summary:
73
+
74
+ ![2026-01-15-phlb2.png](https://raw.githubusercontent.com/jedie/jedie.github.io/main/screenshots/PyHardLinkBackup/2026-01-15-phlb2.png "2026-01-15-phlb2.png")
75
+
76
+
77
+
66
78
  complete help for main CLI app:
67
79
 
68
80
  [comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
69
81
  ```
70
- usage: phlb [-h] {backup,version}
82
+ usage: phlb [-h] {backup,rebuild,version}
71
83
 
72
84
 
73
85
 
74
- ╭─ options ─────────────────────────────────────────────────────────────────────────────────────────────────╮
75
- │ -h, --help show this help message and exit
76
- ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────╯
77
- ╭─ subcommands ─────────────────────────────────────────────────────────────────────────────────────────────╮
78
- │ (required)
79
- │ • backup Backup the source directory to the destination directory using hard links for deduplication.
80
- │ • version Print version and exit
81
- ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────╯
86
+ ╭─ options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
87
+ │ -h, --help show this help message and exit
88
+ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
89
+ ╭─ subcommands ────────────────────────────────────────────────────────────────────────────────────────────────────────╮
90
+ │ (required)
91
+ │ • backup Backup the source directory to the destination directory using hard links for deduplication.
92
+ │ • rebuild Rebuild the file hash and size database by scanning all backup files. And also verify SHA256SUMS and/or
93
+ │ store missing hashes in SHA256SUMS files. │
94
+ │ • version Print version and exit │
95
+ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
82
96
  ```
83
97
  [comment]: <> (✂✂✂ auto generated main help end ✂✂✂)
84
98
 
@@ -214,6 +228,12 @@ Overview of main changes:
214
228
 
215
229
  [comment]: <> (✂✂✂ auto generated history start ✂✂✂)
216
230
 
231
+ * [v1.3.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.2.0...v1.3.0)
232
+ * 2026-01-15 - Verify SHA256SUMS files in "rebuild" command, too.
233
+ * 2026-01-15 - Code cleanup: use more generic names for and in BackupProgress
234
+ * 2026-01-15 - Add tests for rebuild
235
+ * 2026-01-15 - Add command to "rebuld" the size and hash filesystem database
236
+ * 2026-01-15 - Add screenshots in the README
217
237
  * [v1.2.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.1.0...v1.2.0)
218
238
  * 2026-01-15 - Add error handling: Log exception but continue with the backup
219
239
  * 2026-01-15 - Check permission and hadlink support on destination path
@@ -225,6 +245,9 @@ Overview of main changes:
225
245
  * 2026-01-14 - Add "Overview of main changes" to README
226
246
  * [v1.0.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.0...v1.0.1)
227
247
  * 2026-01-13 - Store SHA256SUMS files in backup directories
248
+
249
+ <details><summary>Expand older history entries ...</summary>
250
+
228
251
  * [v1.0.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.13.0...v1.0.0)
229
252
  * 2026-01-13 - Change "./cli.py" to "phlb" (because it's the name installed via pipx)
230
253
  * 2026-01-13 - Update README
@@ -235,9 +258,6 @@ Overview of main changes:
235
258
  * 2026-01-13 - Add DocWrite, handle broken symlinks, keep file meta, handle missing hardlink sources
236
259
  * 2026-01-12 - First working iteration with rich progess bar
237
260
  * 2026-01-08 - Rewrite everything
238
-
239
- <details><summary>Expand older history entries ...</summary>
240
-
241
261
  * [v0.13.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.3...v0.13.0)
242
262
  * 2020-03-18 - release v0.13.0
243
263
  * 2020-03-17 - deactivate pypy tests in travis, because of SQLite errors, like:
@@ -3,5 +3,5 @@
3
3
  """
4
4
 
5
5
  # See https://packaging.python.org/en/latest/specifications/version-specifiers/
6
- __version__ = '1.2.0'
6
+ __version__ = '1.3.0'
7
7
  __author__ = 'Jens Diemer <PyHardLinkBackup@jensdiemer.de>'
@@ -21,7 +21,8 @@ from PyHardLinkBackup.utilities.filesystem import (
21
21
  supports_hardlinks,
22
22
  )
23
23
  from PyHardLinkBackup.utilities.humanize import human_filesize
24
- from PyHardLinkBackup.utilities.rich_utils import BackupProgress
24
+ from PyHardLinkBackup.utilities.rich_utils import DisplayFileTreeProgress
25
+ from PyHardLinkBackup.utilities.sha256sums import store_hash
25
26
 
26
27
 
27
28
  logger = logging.getLogger(__name__)
@@ -151,22 +152,6 @@ def backup_one_file(
151
152
  store_hash(dst_path, file_hash)
152
153
 
153
154
 
154
- def store_hash(file_path: Path, file_hash: str):
155
- """DocWrite: README.md ## SHA256SUMS
156
- A `SHA256SUMS` file is stored in each backup directory containing the SHA256 hashes of all files in that directory.
157
- It's the same format as e.g.: `sha256sum * > SHA256SUMS` command produces.
158
- So it's possible to verify the integrity of the backup files later.
159
- e.g.:
160
- ```bash
161
- cd .../your/backup/foobar/20240101_120000/
162
- sha256sum -c SHA256SUMS
163
- ```
164
- """
165
- hash_file_path = file_path.parent / 'SHA256SUMS'
166
- with hash_file_path.open('a') as f:
167
- f.write(f'{file_hash} {file_path.name}\n')
168
-
169
-
170
155
  def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> BackupResult:
171
156
  src_root = src_root.resolve()
172
157
  if not src_root.is_dir():
@@ -202,7 +187,7 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
202
187
 
203
188
  print(f'\nBackup to {backup_dir}...\n')
204
189
 
205
- with BackupProgress(src_file_count, src_total_size) as progress:
190
+ with DisplayFileTreeProgress(src_file_count, src_total_size) as progress:
206
191
  # "Databases" for deduplication
207
192
  size_db = FileSizeDatabase(phlb_conf_dir)
208
193
  hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
@@ -226,11 +211,13 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
226
211
  else:
227
212
  now = time.monotonic()
228
213
  if now >= next_update:
229
- progress.update(backup_count=backup_result.backup_count, backup_size=backup_result.backup_size)
214
+ progress.update(
215
+ completed_file_count=backup_result.backup_count, completed_size=backup_result.backup_size
216
+ )
230
217
  next_update = now + 0.5
231
218
 
232
219
  # Finalize progress indicator values:
233
- progress.update(backup_count=backup_result.backup_count, backup_size=backup_result.backup_size)
220
+ progress.update(completed_file_count=backup_result.backup_count, completed_size=backup_result.backup_size)
234
221
 
235
222
  print(f'\nBackup complete: {backup_dir} (total size {human_filesize(backup_result.backup_size)})\n')
236
223
  print(f' Total files processed: {backup_result.backup_count}')
@@ -7,6 +7,7 @@ from cli_base.cli_tools.verbosity import setup_logging
7
7
  from cli_base.tyro_commands import TyroVerbosityArgType
8
8
  from rich import print # noqa
9
9
 
10
+ from PyHardLinkBackup import rebuild_databases
10
11
  from PyHardLinkBackup.backup import backup_tree
11
12
  from PyHardLinkBackup.cli_app import app
12
13
 
@@ -48,3 +49,23 @@ def backup(
48
49
  backup_root=dst,
49
50
  excludes=set(excludes),
50
51
  )
52
+
53
+
54
+ @app.command
55
+ def rebuild(
56
+ backup_root: Annotated[
57
+ Path,
58
+ tyro.conf.arg(
59
+ metavar='backup-directory',
60
+ help='Root directory of the the backups.',
61
+ ),
62
+ ],
63
+ /,
64
+ verbosity: TyroVerbosityArgType = 2,
65
+ ) -> None:
66
+ """
67
+ Rebuild the file hash and size database by scanning all backup files. And also verify SHA256SUMS
68
+ and/or store missing hashes in SHA256SUMS files.
69
+ """
70
+ setup_logging(verbosity=verbosity)
71
+ rebuild_databases.rebuild(backup_root=backup_root)
@@ -0,0 +1,147 @@
1
+ import dataclasses
2
+ import logging
3
+ import os
4
+ import sys
5
+ import time
6
+ from pathlib import Path
7
+
8
+ from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
9
+ from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
10
+ from PyHardLinkBackup.utilities.filesystem import hash_file, humanized_fs_scan, iter_scandir_files
11
+ from PyHardLinkBackup.utilities.humanize import human_filesize
12
+ from PyHardLinkBackup.utilities.rich_utils import DisplayFileTreeProgress
13
+ from PyHardLinkBackup.utilities.sha256sums import check_sha256sums, store_hash
14
+
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @dataclasses.dataclass
20
+ class RebuildResult:
21
+ process_count: int = 0
22
+ process_size: int = 0
23
+ #
24
+ added_size_count: int = 0
25
+ added_hash_count: int = 0
26
+ #
27
+ error_count: int = 0
28
+ #
29
+ hash_verified_count: int = 0
30
+ hash_mismatch_count: int = 0
31
+ hash_not_found_count: int = 0
32
+
33
+
34
+ def rebuild_one_file(
35
+ *,
36
+ entry: os.DirEntry,
37
+ size_db: FileSizeDatabase,
38
+ hash_db: FileHashDatabase,
39
+ rebuild_result: RebuildResult,
40
+ ):
41
+ rebuild_result.process_count += 1
42
+
43
+ if entry.name == 'SHA256SUMS':
44
+ # Skip existing SHA256SUMS files
45
+ return
46
+
47
+ size = entry.stat().st_size
48
+ rebuild_result.process_size += size
49
+
50
+ if size < size_db.MIN_SIZE:
51
+ # Small files will never deduplicate, skip them
52
+ return
53
+
54
+ file_path = Path(entry.path)
55
+ file_hash = hash_file(file_path)
56
+
57
+ if size not in size_db:
58
+ size_db.add(size)
59
+ rebuild_result.added_size_count += 1
60
+
61
+ if file_hash not in hash_db:
62
+ hash_db[file_hash] = file_path
63
+ rebuild_result.added_hash_count += 1
64
+
65
+ # We have calculated the current hash of the file,
66
+ # Let's check if we can verify it, too:
67
+ file_path = Path(entry.path)
68
+ compare_result = check_sha256sums(
69
+ file_path=file_path,
70
+ file_hash=file_hash,
71
+ )
72
+ if compare_result is True:
73
+ rebuild_result.hash_verified_count += 1
74
+ elif compare_result is False:
75
+ rebuild_result.hash_mismatch_count += 1
76
+ elif compare_result is None:
77
+ rebuild_result.hash_not_found_count += 1
78
+ store_hash(
79
+ file_path=file_path,
80
+ file_hash=file_hash,
81
+ )
82
+
83
+
84
+ def rebuild(backup_root: Path) -> RebuildResult:
85
+ backup_root = backup_root.resolve()
86
+ if not backup_root.is_dir():
87
+ print(f'Error: Backup directory "{backup_root}" does not exist!')
88
+ sys.exit(1)
89
+
90
+ phlb_conf_dir = backup_root / '.phlb'
91
+ if not phlb_conf_dir.is_dir():
92
+ print(
93
+ f'Error: Backup directory "{backup_root}" seems to be wrong:'
94
+ f' Our hidden ".phlb" configuration directory is missing!'
95
+ )
96
+ sys.exit(1)
97
+
98
+ file_count, total_size = humanized_fs_scan(backup_root, excludes={'.phlb'})
99
+
100
+ with DisplayFileTreeProgress(file_count, total_size) as progress:
101
+ # "Databases" for deduplication
102
+ size_db = FileSizeDatabase(phlb_conf_dir)
103
+ hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
104
+
105
+ rebuild_result = RebuildResult()
106
+
107
+ next_update = 0
108
+ for entry in iter_scandir_files(backup_root, excludes={'.phlb'}):
109
+ try:
110
+ rebuild_one_file(
111
+ entry=entry,
112
+ size_db=size_db,
113
+ hash_db=hash_db,
114
+ rebuild_result=rebuild_result,
115
+ )
116
+ except Exception as err:
117
+ logger.exception(f'Backup {entry.path} {err.__class__.__name__}: {err}')
118
+ rebuild_result.error_count += 1
119
+ else:
120
+ now = time.monotonic()
121
+ if now >= next_update:
122
+ progress.update(
123
+ completed_file_count=rebuild_result.process_count, completed_size=rebuild_result.process_size
124
+ )
125
+ next_update = now + 0.5
126
+
127
+ # Finalize progress indicator values:
128
+ progress.update(completed_file_count=rebuild_result.process_count, completed_size=rebuild_result.process_size)
129
+
130
+ print(f'\nRebuild "{backup_root}" completed:')
131
+ print(f' Total files processed: {rebuild_result.process_count}')
132
+ print(f' Total size processed: {human_filesize(rebuild_result.process_size)}')
133
+
134
+ print(f' Added file size information entries: {rebuild_result.added_size_count}')
135
+ print(f' Added file hash entries: {rebuild_result.added_hash_count}')
136
+
137
+ if rebuild_result.error_count > 0:
138
+ print(f' Errors during rebuild: {rebuild_result.error_count} (see log for details)')
139
+
140
+ print('\nSHA256SUMS verification results:')
141
+ print(f' Successfully verified files: {rebuild_result.hash_verified_count}')
142
+ print(f' File hash mismatches: {rebuild_result.hash_mismatch_count}')
143
+ print(f' File hashes not found, newly stored: {rebuild_result.hash_not_found_count}')
144
+
145
+ print()
146
+
147
+ return rebuild_result
@@ -0,0 +1,204 @@
1
+ import logging
2
+ import tempfile
3
+ import textwrap
4
+ from pathlib import Path
5
+ from unittest.mock import patch
6
+
7
+ from bx_py_utils.test_utils.redirect import RedirectOut
8
+ from cli_base.cli_tools.test_utils.base_testcases import BaseTestCase
9
+
10
+ from PyHardLinkBackup import rebuild_databases
11
+ from PyHardLinkBackup.rebuild_databases import RebuildResult, rebuild, rebuild_one_file
12
+ from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
13
+
14
+
15
+ def sorted_rglob_paths(path: Path):
16
+ return sorted([str(p.relative_to(path)) for p in path.rglob('*')])
17
+
18
+
19
+ def sorted_rglob_files(path: Path):
20
+ return sorted([str(p.relative_to(path)) for p in path.rglob('*') if p.is_file()])
21
+
22
+
23
+ class RebuildDatabaseTestCase(BaseTestCase):
24
+ def test_happy_path(self):
25
+ with tempfile.TemporaryDirectory() as temp_dir:
26
+ temp_path = Path(temp_dir)
27
+
28
+ backup_root = temp_path / 'backup'
29
+
30
+ with self.assertRaises(SystemExit), RedirectOut() as redirected_out:
31
+ rebuild(backup_root)
32
+
33
+ self.assertEqual(redirected_out.stderr, '')
34
+ self.assertEqual(redirected_out.stdout, f'Error: Backup directory "{backup_root}" does not exist!\n')
35
+
36
+ backup_root.mkdir()
37
+
38
+ with self.assertRaises(SystemExit), RedirectOut() as redirected_out:
39
+ rebuild(backup_root)
40
+
41
+ self.assertEqual(redirected_out.stderr, '')
42
+ self.assertIn('hidden ".phlb" configuration directory is missing', redirected_out.stdout)
43
+
44
+ phlb_conf_dir = backup_root / '.phlb'
45
+ phlb_conf_dir.mkdir()
46
+
47
+ #######################################################################################
48
+ # Run on empty backup directory:
49
+
50
+ self.assertEqual(sorted_rglob_paths(backup_root), ['.phlb'])
51
+
52
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG), RedirectOut() as redirected_out:
53
+ rebuild_result = rebuild(backup_root)
54
+ self.assertEqual(
55
+ rebuild_result,
56
+ RebuildResult(
57
+ process_count=0,
58
+ process_size=0,
59
+ added_size_count=0,
60
+ added_hash_count=0,
61
+ error_count=0,
62
+ ),
63
+ )
64
+ self.assertEqual(
65
+ sorted_rglob_paths(backup_root),
66
+ [
67
+ '.phlb',
68
+ '.phlb/hash-lookup',
69
+ '.phlb/size-lookup',
70
+ ],
71
+ )
72
+ self.assertEqual(redirected_out.stderr, '')
73
+
74
+ #######################################################################################
75
+ # Add one backuped file and run again:
76
+
77
+ snapshot_path = backup_root / 'source-name' / '2026-01-15-181709'
78
+ snapshot_path.mkdir(parents=True)
79
+
80
+ minimum_file_content = 'X' * FileSizeDatabase.MIN_SIZE
81
+ (snapshot_path / 'file1.txt').write_text(minimum_file_content)
82
+
83
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG), RedirectOut() as redirected_out:
84
+ rebuild_result = rebuild(backup_root)
85
+ self.assertEqual(
86
+ sorted_rglob_paths(backup_root),
87
+ [
88
+ '.phlb',
89
+ '.phlb/hash-lookup',
90
+ '.phlb/hash-lookup/bb',
91
+ '.phlb/hash-lookup/bb/c4',
92
+ '.phlb/hash-lookup/bb/c4/bbc4de2ca238d1ec41fb622b75b5cf7d31a6d2ac92405043dd8f8220364fefc8',
93
+ '.phlb/size-lookup',
94
+ '.phlb/size-lookup/10',
95
+ '.phlb/size-lookup/10/00',
96
+ '.phlb/size-lookup/10/00/1000',
97
+ 'source-name',
98
+ 'source-name/2026-01-15-181709',
99
+ 'source-name/2026-01-15-181709/SHA256SUMS',
100
+ 'source-name/2026-01-15-181709/file1.txt',
101
+ ],
102
+ )
103
+ self.assertEqual(
104
+ sorted_rglob_files(backup_root),
105
+ [
106
+ '.phlb/hash-lookup/bb/c4/bbc4de2ca238d1ec41fb622b75b5cf7d31a6d2ac92405043dd8f8220364fefc8',
107
+ '.phlb/size-lookup/10/00/1000',
108
+ 'source-name/2026-01-15-181709/SHA256SUMS',
109
+ 'source-name/2026-01-15-181709/file1.txt',
110
+ ],
111
+ )
112
+ self.assertEqual(redirected_out.stderr, '')
113
+ self.assertEqual(
114
+ rebuild_result,
115
+ RebuildResult(
116
+ process_count=1,
117
+ process_size=1000,
118
+ added_size_count=1,
119
+ added_hash_count=1,
120
+ error_count=0,
121
+ hash_verified_count=0,
122
+ hash_mismatch_count=0,
123
+ hash_not_found_count=1,
124
+ ),
125
+ )
126
+ self.assertEqual(
127
+ (snapshot_path / 'SHA256SUMS').read_text(),
128
+ 'bbc4de2ca238d1ec41fb622b75b5cf7d31a6d2ac92405043dd8f8220364fefc8 file1.txt\n',
129
+ )
130
+
131
+ #######################################################################################
132
+ # Add a file with same content and run again:
133
+
134
+ minimum_file_content = 'X' * FileSizeDatabase.MIN_SIZE
135
+ (snapshot_path / 'same_content.txt').write_text(minimum_file_content)
136
+
137
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG) as logs, RedirectOut() as redirected_out:
138
+ rebuild_result = rebuild(backup_root)
139
+ # No new hash of size entries, just the new file:
140
+ self.assertEqual(
141
+ sorted_rglob_files(backup_root),
142
+ [
143
+ '.phlb/hash-lookup/bb/c4/bbc4de2ca238d1ec41fb622b75b5cf7d31a6d2ac92405043dd8f8220364fefc8',
144
+ '.phlb/size-lookup/10/00/1000',
145
+ 'source-name/2026-01-15-181709/SHA256SUMS',
146
+ 'source-name/2026-01-15-181709/file1.txt',
147
+ 'source-name/2026-01-15-181709/same_content.txt',
148
+ ],
149
+ )
150
+ self.assertEqual(redirected_out.stderr, '')
151
+ self.assertEqual(
152
+ rebuild_result,
153
+ RebuildResult(
154
+ process_count=3,
155
+ process_size=2000,
156
+ added_size_count=0,
157
+ added_hash_count=0,
158
+ error_count=0,
159
+ hash_verified_count=1, # Existing file verified successfully
160
+ hash_mismatch_count=0,
161
+ hash_not_found_count=1, # One file added
162
+ ),
163
+ '\n'.join(logs.output) + redirected_out.stdout,
164
+ )
165
+ self.assertEqual(
166
+ (snapshot_path / 'SHA256SUMS').read_text(),
167
+ textwrap.dedent("""\
168
+ bbc4de2ca238d1ec41fb622b75b5cf7d31a6d2ac92405043dd8f8220364fefc8 file1.txt
169
+ bbc4de2ca238d1ec41fb622b75b5cf7d31a6d2ac92405043dd8f8220364fefc8 same_content.txt
170
+ """),
171
+ )
172
+
173
+ #######################################################################################
174
+ # Test error handling
175
+
176
+ def rebuild_one_file_mock(*, entry, **kwargs):
177
+ if entry.name == 'file1.txt':
178
+ raise IOError('Bam!')
179
+ return rebuild_one_file(entry=entry, **kwargs)
180
+
181
+ with (
182
+ self.assertLogs('PyHardLinkBackup', level=logging.ERROR) as logs,
183
+ RedirectOut() as redirected_out,
184
+ patch.object(rebuild_databases, 'rebuild_one_file', rebuild_one_file_mock),
185
+ ):
186
+ rebuild_result = rebuild(backup_root)
187
+ logs = ''.join(logs.output)
188
+ self.assertIn(f'Backup {snapshot_path}/file1.txt OSError: Bam!\n', logs)
189
+ self.assertIn('\nTraceback (most recent call last):\n', logs)
190
+ self.assertEqual(redirected_out.stderr, '')
191
+
192
+ self.assertEqual(
193
+ rebuild_result,
194
+ RebuildResult(
195
+ process_count=2,
196
+ process_size=1000,
197
+ added_size_count=0,
198
+ added_hash_count=0,
199
+ error_count=1, # <<< one file caused error
200
+ hash_verified_count=1,
201
+ hash_mismatch_count=0,
202
+ hash_not_found_count=0,
203
+ ),
204
+ )
@@ -32,6 +32,10 @@ class FileHashDatabase:
32
32
  hash_path = self.base_path / first_dir_name / second_dir_name / hash
33
33
  return hash_path
34
34
 
35
+ def __contains__(self, hash: str) -> bool:
36
+ hash_path = self._get_hash_path(hash)
37
+ return hash_path.exists()
38
+
35
39
  def get(self, hash: str) -> Path | None:
36
40
  hash_path = self._get_hash_path(hash)
37
41
  try:
@@ -32,8 +32,8 @@ class HumanFileSizeColumn(ProgressColumn):
32
32
  return Text(human_filesize(file_size))
33
33
 
34
34
 
35
- class BackupProgress:
36
- def __init__(self, src_file_count: int, src_total_size: int):
35
+ class DisplayFileTreeProgress:
36
+ def __init__(self, total_file_count: int, total_size: int):
37
37
  percentage_format = '[progress.percentage]{task.percentage:>3.1f}%'
38
38
  self.overall_progress = Progress(
39
39
  TaskProgressColumn(text_format=percentage_format),
@@ -50,7 +50,7 @@ class BackupProgress:
50
50
  BarColumn(bar_width=50),
51
51
  TextColumn('{task.completed}/{task.total} Files'),
52
52
  )
53
- self.file_count_progress_task_id = self.file_count_progress.add_task(description='', total=src_file_count)
53
+ self.file_count_progress_task_id = self.file_count_progress.add_task(description='', total=total_file_count)
54
54
  self.file_count_progress_task = self.file_count_progress.tasks[0]
55
55
 
56
56
  self.file_size_progress = Progress(
@@ -60,11 +60,11 @@ class BackupProgress:
60
60
  '|',
61
61
  TransferSpeedColumn(),
62
62
  )
63
- self.file_size_progress_task_id = self.file_size_progress.add_task(description='', total=src_total_size)
63
+ self.file_size_progress_task_id = self.file_size_progress.add_task(description='', total=total_size)
64
64
  self.file_size_progress_task = self.file_size_progress.tasks[0]
65
65
 
66
66
  progress_table = Table.grid()
67
- progress_table.add_row(Panel(self.overall_progress, title='[b]Overall Backup Progress', border_style='green'))
67
+ progress_table.add_row(Panel(self.overall_progress, title='[b]Overall Progress', border_style='green'))
68
68
  progress_table.add_row(Panel(self.file_count_progress, title='Total files saved'))
69
69
  progress_table.add_row(Panel(self.file_size_progress, title='Total file size processed'))
70
70
 
@@ -74,15 +74,15 @@ class BackupProgress:
74
74
  self.live.__enter__()
75
75
  return self
76
76
 
77
- def update(self, backup_count: int, backup_size: int):
77
+ def update(self, completed_file_count: int, completed_size: int):
78
78
  self.file_count_progress.update(
79
79
  task_id=self.file_count_progress_task_id,
80
- completed=backup_count,
80
+ completed=completed_file_count,
81
81
  refresh=True,
82
82
  )
83
83
  self.file_size_progress.update(
84
84
  task_id=self.file_size_progress_task_id,
85
- completed=backup_size,
85
+ completed=completed_size,
86
86
  refresh=True,
87
87
  )
88
88
  self.overall_progress.update(
@@ -0,0 +1,61 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ def get_sha256sums_path(file_path: Path):
9
+ """
10
+ >>> get_sha256sums_path(Path('foo/bar/baz.txt'))
11
+ PosixPath('foo/bar/SHA256SUMS')
12
+ """
13
+ hash_file_path = file_path.parent / 'SHA256SUMS'
14
+ return hash_file_path
15
+
16
+
17
+ def store_hash(file_path: Path, file_hash: str):
18
+ """DocWrite: README.md ## SHA256SUMS
19
+ A `SHA256SUMS` file is stored in each backup directory containing the SHA256 hashes of all files in that directory.
20
+ It's the same format as e.g.: `sha256sum * > SHA256SUMS` command produces.
21
+ So it's possible to verify the integrity of the backup files later.
22
+ e.g.:
23
+ ```bash
24
+ cd .../your/backup/foobar/20240101_120000/
25
+ sha256sum -c SHA256SUMS
26
+ ```
27
+ """
28
+ hash_file_path = get_sha256sums_path(file_path)
29
+ with hash_file_path.open('a') as f:
30
+ f.write(f'{file_hash} {file_path.name}\n')
31
+
32
+
33
+ def check_sha256sums(
34
+ *,
35
+ file_path: Path,
36
+ file_hash: str,
37
+ ) -> bool | None:
38
+ hash_file_path = get_sha256sums_path(file_path=file_path)
39
+ if not hash_file_path.is_file():
40
+ return None # Nothing to verify against
41
+
42
+ with hash_file_path.open('r') as f:
43
+ for line in f:
44
+ try:
45
+ expected_hash, filename = line.split(' ', maxsplit=1)
46
+ except ValueError:
47
+ logger.exception(f'Invalid line in "{hash_file_path}": {line!r}')
48
+ else:
49
+ filename = filename.strip()
50
+ if filename == file_path.name:
51
+ if not expected_hash == file_hash:
52
+ logger.error(
53
+ f'Hash {file_hash} from file {file_path} does not match hash in {hash_file_path} !'
54
+ )
55
+ return False
56
+ else:
57
+ logger.debug(f'{file_path} hash verified successfully from {hash_file_path}.')
58
+ return True
59
+
60
+ logger.info('No SHA256SUMS entry found for file: %s', file_path)
61
+ return None
@@ -72,8 +72,10 @@ class FileHashDatabaseTestCase(BaseTestCase):
72
72
  file_a_path.touch()
73
73
 
74
74
  self.assertIs(hash_db.get('12345678abcdef'), None)
75
+ self.assertIs('12345678abcdef' in hash_db, False)
75
76
  hash_db['12345678abcdef'] = file_a_path
76
77
  self.assertEqual(hash_db.get('12345678abcdef'), file_a_path)
78
+ self.assertIs('12345678abcdef' in hash_db, True)
77
79
  with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
78
80
  self.assertEqual(
79
81
  get_hash_db_filenames(hash_db),
@@ -48,22 +48,36 @@ Backup the source directory to the destination directory using hard links for de
48
48
 
49
49
 
50
50
 
51
+ Running a backup looks like:
52
+
53
+ ![2026-01-15-phlb1.png](https://raw.githubusercontent.com/jedie/jedie.github.io/main/screenshots/PyHardLinkBackup/2026-01-15-phlb1.png "2026-01-15-phlb1.png")
54
+
55
+
56
+
57
+ If it's finished it display a summary:
58
+
59
+ ![2026-01-15-phlb2.png](https://raw.githubusercontent.com/jedie/jedie.github.io/main/screenshots/PyHardLinkBackup/2026-01-15-phlb2.png "2026-01-15-phlb2.png")
60
+
61
+
62
+
51
63
  complete help for main CLI app:
52
64
 
53
65
  [comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
54
66
  ```
55
- usage: phlb [-h] {backup,version}
67
+ usage: phlb [-h] {backup,rebuild,version}
56
68
 
57
69
 
58
70
 
59
- ╭─ options ─────────────────────────────────────────────────────────────────────────────────────────────────╮
60
- │ -h, --help show this help message and exit
61
- ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────╯
62
- ╭─ subcommands ─────────────────────────────────────────────────────────────────────────────────────────────╮
63
- │ (required)
64
- │ • backup Backup the source directory to the destination directory using hard links for deduplication.
65
- │ • version Print version and exit
66
- ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────╯
71
+ ╭─ options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
72
+ │ -h, --help show this help message and exit
73
+ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
74
+ ╭─ subcommands ────────────────────────────────────────────────────────────────────────────────────────────────────────╮
75
+ │ (required)
76
+ │ • backup Backup the source directory to the destination directory using hard links for deduplication.
77
+ │ • rebuild Rebuild the file hash and size database by scanning all backup files. And also verify SHA256SUMS and/or
78
+ │ store missing hashes in SHA256SUMS files. │
79
+ │ • version Print version and exit │
80
+ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
67
81
  ```
68
82
  [comment]: <> (✂✂✂ auto generated main help end ✂✂✂)
69
83
 
@@ -199,6 +213,12 @@ Overview of main changes:
199
213
 
200
214
  [comment]: <> (✂✂✂ auto generated history start ✂✂✂)
201
215
 
216
+ * [v1.3.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.2.0...v1.3.0)
217
+ * 2026-01-15 - Verify SHA256SUMS files in "rebuild" command, too.
218
+ * 2026-01-15 - Code cleanup: use more generic names for and in BackupProgress
219
+ * 2026-01-15 - Add tests for rebuild
220
+ * 2026-01-15 - Add command to "rebuld" the size and hash filesystem database
221
+ * 2026-01-15 - Add screenshots in the README
202
222
  * [v1.2.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.1.0...v1.2.0)
203
223
  * 2026-01-15 - Add error handling: Log exception but continue with the backup
204
224
  * 2026-01-15 - Check permission and hadlink support on destination path
@@ -210,6 +230,9 @@ Overview of main changes:
210
230
  * 2026-01-14 - Add "Overview of main changes" to README
211
231
  * [v1.0.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.0...v1.0.1)
212
232
  * 2026-01-13 - Store SHA256SUMS files in backup directories
233
+
234
+ <details><summary>Expand older history entries ...</summary>
235
+
213
236
  * [v1.0.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.13.0...v1.0.0)
214
237
  * 2026-01-13 - Change "./cli.py" to "phlb" (because it's the name installed via pipx)
215
238
  * 2026-01-13 - Update README
@@ -220,9 +243,6 @@ Overview of main changes:
220
243
  * 2026-01-13 - Add DocWrite, handle broken symlinks, keep file meta, handle missing hardlink sources
221
244
  * 2026-01-12 - First working iteration with rich progess bar
222
245
  * 2026-01-08 - Rewrite everything
223
-
224
- <details><summary>Expand older history entries ...</summary>
225
-
226
246
  * [v0.13.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.3...v0.13.0)
227
247
  * 2020-03-18 - release v0.13.0
228
248
  * 2020-03-17 - deactivate pypy tests in travis, because of SQLite errors, like: