PyHardLinkBackup 1.1.0__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.pre-commit-config.yaml +1 -1
  2. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PKG-INFO +13 -5
  3. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/__init__.py +1 -1
  4. pyhardlinkbackup-1.2.0/PyHardLinkBackup/backup.py +252 -0
  5. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_backup.py +68 -2
  6. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_doc_write.py +2 -1
  7. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_readme_history.py +2 -1
  8. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/filesystem.py +26 -1
  9. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/rich_utils.py +6 -5
  10. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +1 -1
  11. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/tests/test_file_size_database.py +1 -1
  12. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/tests/test_filesystem.py +34 -2
  13. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/README.md +11 -3
  14. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/pyproject.toml +1 -1
  15. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/uv.lock +80 -80
  16. pyhardlinkbackup-1.1.0/PyHardLinkBackup/backup.py +0 -229
  17. pyhardlinkbackup-1.1.0/PyHardLinkBackup/utilities/tests/base_testcases.py +0 -88
  18. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.editorconfig +0 -0
  19. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.github/workflows/tests.yml +0 -0
  20. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.gitignore +0 -0
  21. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.idea/.gitignore +0 -0
  22. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.pre-commit-hooks.yaml +0 -0
  23. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.run/Template Python tests.run.xml +0 -0
  24. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.run/Unittests - __all__.run.xml +0 -0
  25. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.run/cli.py --help.run.xml +0 -0
  26. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.run/dev-cli update.run.xml +0 -0
  27. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.run/only DocTests.run.xml +0 -0
  28. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.run/only DocWrite.run.xml +0 -0
  29. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.venv-app/lib/python3.12/site-packages/cli_base/tests/shell_complete_snapshots/.gitignore +0 -0
  30. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/__main__.py +0 -0
  31. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_app/__init__.py +0 -0
  32. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_app/phlb.py +0 -0
  33. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/__init__.py +0 -0
  34. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/benchmark.py +0 -0
  35. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/code_style.py +0 -0
  36. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/packaging.py +0 -0
  37. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/shell_completion.py +0 -0
  38. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/testing.py +0 -0
  39. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/update_readme_history.py +0 -0
  40. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/constants.py +0 -0
  41. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/__init__.py +0 -0
  42. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_doctests.py +0 -0
  43. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_project_setup.py +0 -0
  44. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_readme.py +0 -0
  45. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/__init__.py +0 -0
  46. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/file_hash_database.py +0 -0
  47. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/file_size_database.py +0 -0
  48. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/humanize.py +0 -0
  49. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/tests/__init__.py +0 -0
  50. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/cli.py +0 -0
  51. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/dev-cli.py +0 -0
  52. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/dist/.gitignore +0 -0
  53. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/docs/README.md +0 -0
  54. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/docs/about-docs.md +0 -0
  55. {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/noxfile.py +0 -0
@@ -2,6 +2,6 @@
2
2
  # See https://pre-commit.com for more information
3
3
  repos:
4
4
  - repo: https://github.com/jedie/cli-base-utilities
5
- rev: v0.26.0
5
+ rev: v0.27.0
6
6
  hooks:
7
7
  - id: update-readme-history
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PyHardLinkBackup
3
- Version: 1.1.0
3
+ Version: 1.2.0
4
4
  Summary: HardLink/Deduplication Backups with Python
5
5
  Project-URL: Documentation, https://github.com/jedie/PyHardLinkBackup
6
6
  Project-URL: Source, https://github.com/jedie/PyHardLinkBackup
@@ -8,7 +8,7 @@ Author-email: Jens Diemer <PyHardLinkBackup@jensdiemer.de>
8
8
  License: GPL-3.0-or-later
9
9
  Requires-Python: >=3.12
10
10
  Requires-Dist: bx-py-utils
11
- Requires-Dist: cli-base-utilities
11
+ Requires-Dist: cli-base-utilities>=0.27.0
12
12
  Requires-Dist: rich
13
13
  Requires-Dist: tyro
14
14
  Description-Content-Type: text/markdown
@@ -25,6 +25,8 @@ HardLink/Deduplication Backups with Python
25
25
 
26
26
  **WIP:** v1.0.0 is a complete rewrite of PyHardLinkBackup.
27
27
 
28
+ It's similar to `rsync --link-dest` but the deduplication is done globally for all backups and all paths.
29
+
28
30
  ## installation
29
31
 
30
32
  You can use [pipx](https://pipx.pypa.io/stable/installation/) to install and use PyHardLinkBackup, e.g.:
@@ -212,6 +214,12 @@ Overview of main changes:
212
214
 
213
215
  [comment]: <> (✂✂✂ auto generated history start ✂✂✂)
214
216
 
217
+ * [v1.2.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.1.0...v1.2.0)
218
+ * 2026-01-15 - Add error handling: Log exception but continue with the backup
219
+ * 2026-01-15 - Check permission and hadlink support on destination path
220
+ * 2026-01-14 - Enhance progress bars
221
+ * 2026-01-14 - A a note to rsync --link-dest
222
+ * 2026-01-14 - Use cli_base.cli_tools.test_utils.base_testcases
215
223
  * [v1.1.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.1...v1.1.0)
216
224
  * 2026-01-14 - Change backup timestamp directory to old schema: '%Y-%m-%d-%H%M%S'
217
225
  * 2026-01-14 - Add "Overview of main changes" to README
@@ -227,6 +235,9 @@ Overview of main changes:
227
235
  * 2026-01-13 - Add DocWrite, handle broken symlinks, keep file meta, handle missing hardlink sources
228
236
  * 2026-01-12 - First working iteration with rich progess bar
229
237
  * 2026-01-08 - Rewrite everything
238
+
239
+ <details><summary>Expand older history entries ...</summary>
240
+
230
241
  * [v0.13.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.3...v0.13.0)
231
242
  * 2020-03-18 - release v0.13.0
232
243
  * 2020-03-17 - deactivate pypy tests in travis, because of SQLite errors, like:
@@ -245,9 +256,6 @@ Overview of main changes:
245
256
  * 2020-03-17 - dynamic chunk size
246
257
  * 2020-03-17 - ignore *.sha512 by default
247
258
  * 2020-03-17 - Update boot_pyhardlinkbackup.sh
248
-
249
- <details><summary>Expand older history entries ...</summary>
250
-
251
259
  * [v0.12.3](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.2...v0.12.3)
252
260
  * 2020-03-17 - update README.rst
253
261
  * 2020-03-17 - don't publish if tests fail
@@ -3,5 +3,5 @@
3
3
  """
4
4
 
5
5
  # See https://packaging.python.org/en/latest/specifications/version-specifiers/
6
- __version__ = '1.1.0'
6
+ __version__ = '1.2.0'
7
7
  __author__ = 'Jens Diemer <PyHardLinkBackup@jensdiemer.de>'
@@ -0,0 +1,252 @@
1
+ import dataclasses
2
+ import logging
3
+ import os
4
+ import shutil
5
+ import sys
6
+ import time
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+
10
+ from rich import print # noqa
11
+
12
+ from PyHardLinkBackup.constants import CHUNK_SIZE
13
+ from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
14
+ from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
15
+ from PyHardLinkBackup.utilities.filesystem import (
16
+ copy_and_hash,
17
+ hash_file,
18
+ humanized_fs_scan,
19
+ iter_scandir_files,
20
+ read_and_hash_file,
21
+ supports_hardlinks,
22
+ )
23
+ from PyHardLinkBackup.utilities.humanize import human_filesize
24
+ from PyHardLinkBackup.utilities.rich_utils import BackupProgress
25
+
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ @dataclasses.dataclass
31
+ class BackupResult:
32
+ backup_dir: Path
33
+ #
34
+ backup_count: int = 0
35
+ backup_size: int = 0
36
+ #
37
+ symlink_files: int = 0
38
+ hardlinked_files: int = 0
39
+ hardlinked_size: int = 0
40
+ #
41
+ copied_files: int = 0
42
+ copied_size: int = 0
43
+ #
44
+ copied_small_files: int = 0
45
+ copied_small_size: int = 0
46
+ #
47
+ error_count: int = 0
48
+
49
+
50
+ def backup_one_file(
51
+ *,
52
+ src_root: Path,
53
+ entry: os.DirEntry,
54
+ size_db: FileSizeDatabase,
55
+ hash_db: FileHashDatabase,
56
+ backup_dir: Path,
57
+ backup_result: BackupResult,
58
+ ) -> None:
59
+ backup_result.backup_count += 1
60
+ src_path = Path(entry.path)
61
+
62
+ dst_path = backup_dir / src_path.relative_to(src_root)
63
+ dst_dir_path = dst_path.parent
64
+ if not dst_dir_path.exists():
65
+ dst_dir_path.mkdir(parents=True, exist_ok=False)
66
+
67
+ try:
68
+ size = entry.stat().st_size
69
+ except FileNotFoundError:
70
+ # e.g.: Handle broken symlink
71
+ target = os.readlink(src_path)
72
+ dst_path.symlink_to(target)
73
+ backup_result.symlink_files += 1
74
+ return
75
+
76
+ backup_result.backup_size += size
77
+
78
+ if entry.name == 'SHA256SUMS':
79
+ # Skip existing SHA256SUMS files in source tree,
80
+ # because we create our own SHA256SUMS files.
81
+ logger.debug('Skip existing SHA256SUMS file: %s', src_path)
82
+ return
83
+
84
+ if entry.is_symlink():
85
+ logger.debug('Copy symlink: %s to %s', src_path, dst_path)
86
+ target = os.readlink(src_path)
87
+ dst_path.symlink_to(target)
88
+ backup_result.symlink_files += 1
89
+ return
90
+
91
+ # Process regular files
92
+ assert entry.is_file(follow_symlinks=False), f'Unexpected non-file: {src_path}'
93
+
94
+ # Deduplication logic
95
+
96
+ if size < size_db.MIN_SIZE:
97
+ # Small file -> always copy without deduplication
98
+ logger.info('Copy small file: %s to %s', src_path, dst_path)
99
+ file_hash = copy_and_hash(src_path, dst_path)
100
+ backup_result.copied_files += 1
101
+ backup_result.copied_size += size
102
+ backup_result.copied_small_files += 1
103
+ backup_result.copied_small_size += size
104
+ store_hash(dst_path, file_hash)
105
+ return
106
+
107
+ if size in size_db:
108
+ logger.debug('File with size %iBytes found before -> hash: %s', size, src_path)
109
+
110
+ if size <= CHUNK_SIZE:
111
+ # File can be read complete into memory
112
+ logger.debug('File size %iBytes <= CHUNK_SIZE (%iBytes) -> read complete into memory', size, CHUNK_SIZE)
113
+ file_content, file_hash = read_and_hash_file(src_path)
114
+ if existing_path := hash_db.get(file_hash):
115
+ logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
116
+ os.link(existing_path, dst_path)
117
+ backup_result.hardlinked_files += 1
118
+ backup_result.hardlinked_size += size
119
+ else:
120
+ logger.info('Store unique file: %s to %s', src_path, dst_path)
121
+ dst_path.write_bytes(file_content)
122
+ hash_db[file_hash] = dst_path
123
+ backup_result.copied_files += 1
124
+ backup_result.copied_size += size
125
+
126
+ else:
127
+ # Large file
128
+ file_hash = hash_file(src_path) # Calculate hash without copying
129
+
130
+ if existing_path := hash_db.get(file_hash):
131
+ logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
132
+ os.link(existing_path, dst_path)
133
+ backup_result.hardlinked_files += 1
134
+ backup_result.hardlinked_size += size
135
+ else:
136
+ logger.info('Copy unique file: %s to %s', src_path, dst_path)
137
+ hash_db[file_hash] = dst_path
138
+ backup_result.copied_files += 1
139
+ backup_result.copied_size += size
140
+
141
+ # Keep original file metadata (permission bits, time stamps, and flags)
142
+ shutil.copy2(src_path, dst_path)
143
+ else:
144
+ # A file with this size not backuped before -> Can't be duplicate -> copy and hash
145
+ file_hash = copy_and_hash(src_path, dst_path)
146
+ size_db.add(size)
147
+ hash_db[file_hash] = dst_path
148
+ backup_result.copied_files += 1
149
+ backup_result.copied_size += size
150
+
151
+ store_hash(dst_path, file_hash)
152
+
153
+
154
+ def store_hash(file_path: Path, file_hash: str):
155
+ """DocWrite: README.md ## SHA256SUMS
156
+ A `SHA256SUMS` file is stored in each backup directory containing the SHA256 hashes of all files in that directory.
157
+ It's the same format as e.g.: `sha256sum * > SHA256SUMS` command produces.
158
+ So it's possible to verify the integrity of the backup files later.
159
+ e.g.:
160
+ ```bash
161
+ cd .../your/backup/foobar/20240101_120000/
162
+ sha256sum -c SHA256SUMS
163
+ ```
164
+ """
165
+ hash_file_path = file_path.parent / 'SHA256SUMS'
166
+ with hash_file_path.open('a') as f:
167
+ f.write(f'{file_hash} {file_path.name}\n')
168
+
169
+
170
+ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> BackupResult:
171
+ src_root = src_root.resolve()
172
+ if not src_root.is_dir():
173
+ print('Error: Source directory does not exist!')
174
+ print(f'Please check source directory: "{src_root}"\n')
175
+ sys.exit(1)
176
+
177
+ backup_root = backup_root.resolve()
178
+ if not backup_root.is_dir():
179
+ print('Error: Backup directory does not exist!')
180
+ print(f'Please create "{backup_root}" directory first and start again!\n')
181
+ sys.exit(1)
182
+
183
+ if not os.access(backup_root, os.W_OK):
184
+ print('Error: No write access to backup directory!')
185
+ print(f'Please check permissions for backup directory: "{backup_root}"\n')
186
+ sys.exit(1)
187
+
188
+ if not supports_hardlinks(backup_root):
189
+ print('Error: Filesystem for backup directory does not support hardlinks!')
190
+ print(f'Please check backup directory: "{backup_root}"\n')
191
+ sys.exit(1)
192
+
193
+ # Step 1: Scan source directory:
194
+ src_file_count, src_total_size = humanized_fs_scan(src_root, excludes)
195
+
196
+ phlb_conf_dir = backup_root / '.phlb'
197
+ phlb_conf_dir.mkdir(parents=False, exist_ok=True)
198
+
199
+ backup_dir = backup_root / src_root.name / datetime.now().strftime('%Y-%m-%d-%H%M%S')
200
+ logger.info('Backup %s to %s', src_root, backup_dir)
201
+ backup_dir.mkdir(parents=True, exist_ok=False)
202
+
203
+ print(f'\nBackup to {backup_dir}...\n')
204
+
205
+ with BackupProgress(src_file_count, src_total_size) as progress:
206
+ # "Databases" for deduplication
207
+ size_db = FileSizeDatabase(phlb_conf_dir)
208
+ hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
209
+
210
+ backup_result = BackupResult(backup_dir=backup_dir)
211
+
212
+ next_update = 0
213
+ for entry in iter_scandir_files(src_root, excludes=excludes):
214
+ try:
215
+ backup_one_file(
216
+ src_root=src_root,
217
+ entry=entry,
218
+ size_db=size_db,
219
+ hash_db=hash_db,
220
+ backup_dir=backup_dir,
221
+ backup_result=backup_result,
222
+ )
223
+ except Exception as err:
224
+ logger.exception(f'Backup {entry.path} {err.__class__.__name__}: {err}')
225
+ backup_result.error_count += 1
226
+ else:
227
+ now = time.monotonic()
228
+ if now >= next_update:
229
+ progress.update(backup_count=backup_result.backup_count, backup_size=backup_result.backup_size)
230
+ next_update = now + 0.5
231
+
232
+ # Finalize progress indicator values:
233
+ progress.update(backup_count=backup_result.backup_count, backup_size=backup_result.backup_size)
234
+
235
+ print(f'\nBackup complete: {backup_dir} (total size {human_filesize(backup_result.backup_size)})\n')
236
+ print(f' Total files processed: {backup_result.backup_count}')
237
+ print(f' * Symlinked files: {backup_result.symlink_files}')
238
+ print(
239
+ f' * Hardlinked files: {backup_result.hardlinked_files}'
240
+ f' (saved {human_filesize(backup_result.hardlinked_size)})'
241
+ )
242
+ print(f' * Copied files: {backup_result.copied_files} (total {human_filesize(backup_result.copied_size)})')
243
+ print(
244
+ f' of which small (<{size_db.MIN_SIZE} Bytes)'
245
+ f' files: {backup_result.copied_small_files}'
246
+ f' (total {human_filesize(backup_result.copied_small_size)})'
247
+ )
248
+ if backup_result.error_count > 0:
249
+ print(f' Errors during backup: {backup_result.error_count} (see log for details)')
250
+ print()
251
+
252
+ return backup_result
@@ -13,14 +13,14 @@ from bx_py_utils.test_utils.assertion import assert_text_equal
13
13
  from bx_py_utils.test_utils.datetime import parse_dt
14
14
  from bx_py_utils.test_utils.log_utils import NoLogs
15
15
  from bx_py_utils.test_utils.redirect import RedirectOut
16
+ from cli_base.cli_tools.test_utils.base_testcases import BaseTestCase
16
17
  from freezegun import freeze_time
17
18
  from tabulate import tabulate
18
19
 
19
20
  from PyHardLinkBackup.backup import BackupResult, backup_tree
20
21
  from PyHardLinkBackup.constants import CHUNK_SIZE
21
22
  from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
22
- from PyHardLinkBackup.utilities.filesystem import iter_scandir_files
23
- from PyHardLinkBackup.utilities.tests.base_testcases import BaseTestCase
23
+ from PyHardLinkBackup.utilities.filesystem import copy_and_hash, iter_scandir_files
24
24
  from PyHardLinkBackup.utilities.tests.test_file_hash_database import assert_hash_db_info
25
25
 
26
26
 
@@ -184,6 +184,7 @@ class BackupTreeTestCase(BaseTestCase):
184
184
  copied_size=67109915,
185
185
  copied_small_files=3,
186
186
  copied_small_size=50,
187
+ error_count=0,
187
188
  ),
188
189
  )
189
190
 
@@ -267,6 +268,7 @@ class BackupTreeTestCase(BaseTestCase):
267
268
  copied_size=50,
268
269
  copied_small_files=3,
269
270
  copied_small_size=50,
271
+ error_count=0,
270
272
  ),
271
273
  )
272
274
  # The second backup:
@@ -360,6 +362,7 @@ class BackupTreeTestCase(BaseTestCase):
360
362
  copied_size=1050,
361
363
  copied_small_files=3,
362
364
  copied_small_size=50,
365
+ error_count=0,
363
366
  ),
364
367
  )
365
368
 
@@ -454,6 +457,7 @@ class BackupTreeTestCase(BaseTestCase):
454
457
  copied_size=31,
455
458
  copied_small_files=1,
456
459
  copied_small_size=31,
460
+ error_count=0,
457
461
  ),
458
462
  )
459
463
 
@@ -474,3 +478,65 @@ class BackupTreeTestCase(BaseTestCase):
474
478
  Symlinks are not stored in our FileHashDatabase, because they are not considered for hardlinking."""
475
479
  with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
476
480
  assert_hash_db_info(backup_root=backup_root, expected='')
481
+
482
+ def test_error_handling(self):
483
+ with tempfile.TemporaryDirectory() as temp_dir:
484
+ temp_path = Path(temp_dir)
485
+
486
+ src_root = temp_path / 'source'
487
+ backup_root = temp_path / 'backup'
488
+
489
+ src_root.mkdir()
490
+ backup_root.mkdir()
491
+
492
+ (src_root / 'file1.txt').write_text('File 1')
493
+ (src_root / 'file2.txt').write_text('File 2')
494
+ (src_root / 'file3.txt').write_text('File 3')
495
+
496
+ # Set modification times to a fixed time for easier testing:
497
+ set_file_times(src_root, dt=parse_dt('2026-01-01T12:00:00+0000'))
498
+
499
+ def mocked_copy_and_hash(src: Path, dst: Path):
500
+ if src.name == 'file2.txt':
501
+ raise PermissionError('Bam!')
502
+ else:
503
+ return copy_and_hash(src, dst)
504
+
505
+ with (
506
+ self.assertLogs(level=logging.ERROR) as logs,
507
+ patch('PyHardLinkBackup.backup.iter_scandir_files', SortedIterScandirFiles),
508
+ patch('PyHardLinkBackup.backup.copy_and_hash', mocked_copy_and_hash),
509
+ freeze_time('2026-01-01T12:34:56Z', auto_tick_seconds=0),
510
+ RedirectOut() as redirected_out,
511
+ ):
512
+ result = backup_tree(
513
+ src_root=src_root,
514
+ backup_root=backup_root,
515
+ excludes={'.cache'},
516
+ )
517
+ self.assertEqual(redirected_out.stderr, '')
518
+ self.assertIn('Backup complete', redirected_out.stdout)
519
+ self.assertIn('Errors during backup:', redirected_out.stdout)
520
+
521
+ logs = ''.join(logs.output)
522
+ self.assertIn(
523
+ f'ERROR:PyHardLinkBackup.backup:Backup {src_root / "file2.txt"} PermissionError: Bam!\n',
524
+ logs,
525
+ )
526
+ self.assertIn('\nTraceback (most recent call last):\n', logs)
527
+ self.assertEqual(
528
+ result,
529
+ BackupResult(
530
+ backup_dir=result.backup_dir,
531
+ backup_count=3,
532
+ backup_size=18,
533
+ symlink_files=0,
534
+ hardlinked_files=0,
535
+ hardlinked_size=0,
536
+ copied_files=2,
537
+ copied_size=12,
538
+ copied_small_files=2,
539
+ copied_small_size=12,
540
+ error_count=1,
541
+ ),
542
+ )
@@ -19,7 +19,8 @@ class DocuWriteApiTestCase(TestCase):
19
19
  """
20
20
  assert_is_file(PACKAGE_ROOT / 'pyproject.toml')
21
21
 
22
- info: GeneratedInfo = generate(base_path=PACKAGE_ROOT)
22
+ with self.assertLogs():
23
+ info: GeneratedInfo = generate(base_path=PACKAGE_ROOT)
23
24
  self.assertGreaterEqual(len(info.paths), 1)
24
25
  self.assertEqual(info.update_count, 0, 'No files should be updated, commit the changes')
25
26
  self.assertEqual(info.remove_count, 0, 'No files should be removed, commit the changes')
@@ -5,4 +5,5 @@ from cli_base.cli_tools.git_history import update_readme_history
5
5
 
6
6
  class ReadmeHistoryTestCase(TestCase):
7
7
  def test_readme_history(self):
8
- update_readme_history(raise_update_error=True)
8
+ with self.assertLogs():
9
+ update_readme_history(raise_update_error=True)
@@ -6,6 +6,7 @@ import time
6
6
  from pathlib import Path
7
7
  from typing import Iterable
8
8
 
9
+ from bx_py_utils.path import assert_is_dir
9
10
  from rich.progress import (
10
11
  Progress,
11
12
  SpinnerColumn,
@@ -80,8 +81,10 @@ def humanized_fs_scan(path: Path, excludes: set[str]) -> tuple[int, int]:
80
81
  '{task.description}',
81
82
  SpinnerColumn('simpleDots'),
82
83
  TextColumn('[green]{task.fields[file_count]} Files'),
84
+ '|',
83
85
  HumanFileSizeColumn(field_name='total_size'),
84
- TextColumn('| [cyan]{task.fields[files_per_sec]} Files/sec'),
86
+ '|',
87
+ TextColumn('[cyan]{task.fields[files_per_sec]} Files/sec'),
85
88
  )
86
89
 
87
90
  file_count = 0
@@ -131,3 +134,25 @@ def humanized_fs_scan(path: Path, excludes: set[str]) -> tuple[int, int]:
131
134
  )
132
135
 
133
136
  return file_count, total_size
137
+
138
+
139
+ def supports_hardlinks(directory: Path) -> bool:
140
+ logger.debug('Checking hardlink support in %s', directory)
141
+ assert_is_dir(directory)
142
+ test_src_file = directory / '.phlb_test'
143
+ test_dst_file = directory / '.phlb_test_link'
144
+ hardlinks_supported = False
145
+ try:
146
+ test_src_file.write_text('test')
147
+ os.link(test_src_file, test_dst_file)
148
+ assert test_dst_file.read_text() == 'test'
149
+ hardlinks_supported = True
150
+ except OSError as err:
151
+ # e.g.: FAT/exFAT filesystems ;)
152
+ logger.exception('Hardlink test failed in %s: %s', directory, err)
153
+ finally:
154
+ test_src_file.unlink(missing_ok=True)
155
+ test_dst_file.unlink(missing_ok=True)
156
+
157
+ logger.info('Hardlink support in %s: %s', directory, hardlinks_supported)
158
+ return hardlinks_supported
@@ -29,13 +29,14 @@ class HumanFileSizeColumn(ProgressColumn):
29
29
  file_size = task.fields[self.field_name]
30
30
  except KeyError:
31
31
  raise KeyError(f'Field {self.field_name=} not found in: {task.fields.keys()=}') from None
32
- return Text(f'| {human_filesize(file_size)}')
32
+ return Text(human_filesize(file_size))
33
33
 
34
34
 
35
35
  class BackupProgress:
36
36
  def __init__(self, src_file_count: int, src_total_size: int):
37
+ percentage_format = '[progress.percentage]{task.percentage:>3.1f}%'
37
38
  self.overall_progress = Progress(
38
- TaskProgressColumn(),
39
+ TaskProgressColumn(text_format=percentage_format),
39
40
  BarColumn(bar_width=50),
40
41
  TextColumn('Elapsed:'),
41
42
  TimeElapsedColumn(),
@@ -45,15 +46,15 @@ class BackupProgress:
45
46
  self.overall_progress_task_id = self.overall_progress.add_task(description='', total=100)
46
47
 
47
48
  self.file_count_progress = Progress(
48
- TaskProgressColumn(),
49
+ TaskProgressColumn(text_format=percentage_format),
49
50
  BarColumn(bar_width=50),
50
- TextColumn('{task.completed} Files'),
51
+ TextColumn('{task.completed}/{task.total} Files'),
51
52
  )
52
53
  self.file_count_progress_task_id = self.file_count_progress.add_task(description='', total=src_file_count)
53
54
  self.file_count_progress_task = self.file_count_progress.tasks[0]
54
55
 
55
56
  self.file_size_progress = Progress(
56
- TaskProgressColumn(),
57
+ TaskProgressColumn(text_format=percentage_format),
57
58
  BarColumn(bar_width=50),
58
59
  HumanFileSizeColumn(),
59
60
  '|',
@@ -6,10 +6,10 @@ from pathlib import Path
6
6
  from bx_py_utils.path import assert_is_dir
7
7
  from bx_py_utils.test_utils.assertion import assert_text_equal
8
8
  from bx_py_utils.test_utils.log_utils import NoLogs
9
+ from cli_base.cli_tools.test_utils.base_testcases import BaseTestCase
9
10
 
10
11
  from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase, HashAlreadyExistsError
11
12
  from PyHardLinkBackup.utilities.filesystem import iter_scandir_files
12
- from PyHardLinkBackup.utilities.tests.base_testcases import BaseTestCase
13
13
 
14
14
 
15
15
  class TemporaryFileHashDatabase(tempfile.TemporaryDirectory):
@@ -4,10 +4,10 @@ from collections.abc import Iterable
4
4
  from pathlib import Path
5
5
 
6
6
  from bx_py_utils.test_utils.log_utils import NoLogs
7
+ from cli_base.cli_tools.test_utils.base_testcases import BaseTestCase
7
8
 
8
9
  from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
9
10
  from PyHardLinkBackup.utilities.filesystem import iter_scandir_files
10
- from PyHardLinkBackup.utilities.tests.base_testcases import BaseTestCase
11
11
 
12
12
 
13
13
  class TemporaryFileSizeDatabase(tempfile.TemporaryDirectory):
@@ -1,14 +1,25 @@
1
1
  import hashlib
2
+ import logging
2
3
  import os
3
4
  import tempfile
4
5
  from pathlib import Path
6
+ from unittest.mock import patch
7
+
8
+ from cli_base.cli_tools.test_utils.base_testcases import BaseTestCase
5
9
 
6
10
  from PyHardLinkBackup.constants import HASH_ALGO
7
- from PyHardLinkBackup.utilities.filesystem import copy_and_hash, hash_file, iter_scandir_files, read_and_hash_file
8
- from PyHardLinkBackup.utilities.tests.base_testcases import BaseTestCase
11
+ from PyHardLinkBackup.utilities.filesystem import (
12
+ copy_and_hash,
13
+ hash_file,
14
+ iter_scandir_files,
15
+ read_and_hash_file,
16
+ supports_hardlinks,
17
+ )
9
18
 
10
19
 
11
20
  class TestHashFile(BaseTestCase):
21
+ maxDiff = None
22
+
12
23
  def test_hash_file(self):
13
24
  self.assertEqual(
14
25
  hashlib.new(HASH_ALGO, b'test content').hexdigest(),
@@ -92,3 +103,24 @@ class TestHashFile(BaseTestCase):
92
103
  logs = ''.join(logs.output)
93
104
  self.assertIn('Scanning directory ', logs)
94
105
  self.assertIn('Excluding directory ', logs)
106
+
107
+ def test_supports_hardlinks(self):
108
+ with tempfile.TemporaryDirectory() as temp:
109
+ with self.assertLogs(level=logging.INFO) as logs:
110
+ self.assertTrue(supports_hardlinks(Path(temp)))
111
+ self.assertEqual(
112
+ ''.join(logs.output),
113
+ f'INFO:PyHardLinkBackup.utilities.filesystem:Hardlink support in {temp}: True',
114
+ )
115
+
116
+ with (
117
+ self.assertLogs(level=logging.ERROR) as logs,
118
+ patch('PyHardLinkBackup.utilities.filesystem.os.link', side_effect=OSError),
119
+ ):
120
+ self.assertFalse(supports_hardlinks(Path(temp)))
121
+ logs = ''.join(logs.output)
122
+ self.assertIn(f'Hardlink test failed in {temp}:', logs)
123
+ self.assertIn('OSError', logs)
124
+
125
+ with self.assertLogs(level=logging.DEBUG), self.assertRaises(NotADirectoryError):
126
+ supports_hardlinks(Path('/not/existing/directory'))
@@ -10,6 +10,8 @@ HardLink/Deduplication Backups with Python
10
10
 
11
11
  **WIP:** v1.0.0 is a complete rewrite of PyHardLinkBackup.
12
12
 
13
+ It's similar to `rsync --link-dest` but the deduplication is done globally for all backups and all paths.
14
+
13
15
  ## installation
14
16
 
15
17
  You can use [pipx](https://pipx.pypa.io/stable/installation/) to install and use PyHardLinkBackup, e.g.:
@@ -197,6 +199,12 @@ Overview of main changes:
197
199
 
198
200
  [comment]: <> (✂✂✂ auto generated history start ✂✂✂)
199
201
 
202
+ * [v1.2.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.1.0...v1.2.0)
203
+ * 2026-01-15 - Add error handling: Log exception but continue with the backup
204
+ * 2026-01-15 - Check permission and hadlink support on destination path
205
+ * 2026-01-14 - Enhance progress bars
206
+ * 2026-01-14 - A a note to rsync --link-dest
207
+ * 2026-01-14 - Use cli_base.cli_tools.test_utils.base_testcases
200
208
  * [v1.1.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.1...v1.1.0)
201
209
  * 2026-01-14 - Change backup timestamp directory to old schema: '%Y-%m-%d-%H%M%S'
202
210
  * 2026-01-14 - Add "Overview of main changes" to README
@@ -212,6 +220,9 @@ Overview of main changes:
212
220
  * 2026-01-13 - Add DocWrite, handle broken symlinks, keep file meta, handle missing hardlink sources
213
221
  * 2026-01-12 - First working iteration with rich progess bar
214
222
  * 2026-01-08 - Rewrite everything
223
+
224
+ <details><summary>Expand older history entries ...</summary>
225
+
215
226
  * [v0.13.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.3...v0.13.0)
216
227
  * 2020-03-18 - release v0.13.0
217
228
  * 2020-03-17 - deactivate pypy tests in travis, because of SQLite errors, like:
@@ -230,9 +241,6 @@ Overview of main changes:
230
241
  * 2020-03-17 - dynamic chunk size
231
242
  * 2020-03-17 - ignore *.sha512 by default
232
243
  * 2020-03-17 - Update boot_pyhardlinkbackup.sh
233
-
234
- <details><summary>Expand older history entries ...</summary>
235
-
236
244
  * [v0.12.3](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.2...v0.12.3)
237
245
  * 2020-03-17 - update README.rst
238
246
  * 2020-03-17 - don't publish if tests fail
@@ -9,7 +9,7 @@ authors = [
9
9
  ]
10
10
  requires-python = ">=3.12"
11
11
  dependencies = [
12
- "cli-base-utilities", # https://github.com/jedie/cli-base-utilities
12
+ "cli-base-utilities>=0.27.0", # https://github.com/jedie/cli-base-utilities
13
13
  "bx_py_utils", # https://github.com/boxine/bx_py_utils
14
14
  "tyro", # https://github.com/brentyi/tyro
15
15
  "rich", # https://github.com/Textualize/rich