PyHardLinkBackup 1.1.0__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.pre-commit-config.yaml +1 -1
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PKG-INFO +13 -5
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/__init__.py +1 -1
- pyhardlinkbackup-1.2.0/PyHardLinkBackup/backup.py +252 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_backup.py +68 -2
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_doc_write.py +2 -1
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_readme_history.py +2 -1
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/filesystem.py +26 -1
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/rich_utils.py +6 -5
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +1 -1
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/tests/test_file_size_database.py +1 -1
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/tests/test_filesystem.py +34 -2
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/README.md +11 -3
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/pyproject.toml +1 -1
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/uv.lock +80 -80
- pyhardlinkbackup-1.1.0/PyHardLinkBackup/backup.py +0 -229
- pyhardlinkbackup-1.1.0/PyHardLinkBackup/utilities/tests/base_testcases.py +0 -88
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.editorconfig +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.github/workflows/tests.yml +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.gitignore +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.idea/.gitignore +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.pre-commit-hooks.yaml +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.run/Template Python tests.run.xml +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.run/Unittests - __all__.run.xml +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.run/cli.py --help.run.xml +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.run/dev-cli update.run.xml +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.run/only DocTests.run.xml +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.run/only DocWrite.run.xml +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/.venv-app/lib/python3.12/site-packages/cli_base/tests/shell_complete_snapshots/.gitignore +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/__main__.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_app/__init__.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_app/phlb.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/__init__.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/benchmark.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/code_style.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/packaging.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/shell_completion.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/testing.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/update_readme_history.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/constants.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/__init__.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_doctests.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_project_setup.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_readme.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/__init__.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/file_hash_database.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/file_size_database.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/humanize.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/tests/__init__.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/cli.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/dev-cli.py +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/dist/.gitignore +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/docs/README.md +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/docs/about-docs.md +0 -0
- {pyhardlinkbackup-1.1.0 → pyhardlinkbackup-1.2.0}/noxfile.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: PyHardLinkBackup
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: HardLink/Deduplication Backups with Python
|
|
5
5
|
Project-URL: Documentation, https://github.com/jedie/PyHardLinkBackup
|
|
6
6
|
Project-URL: Source, https://github.com/jedie/PyHardLinkBackup
|
|
@@ -8,7 +8,7 @@ Author-email: Jens Diemer <PyHardLinkBackup@jensdiemer.de>
|
|
|
8
8
|
License: GPL-3.0-or-later
|
|
9
9
|
Requires-Python: >=3.12
|
|
10
10
|
Requires-Dist: bx-py-utils
|
|
11
|
-
Requires-Dist: cli-base-utilities
|
|
11
|
+
Requires-Dist: cli-base-utilities>=0.27.0
|
|
12
12
|
Requires-Dist: rich
|
|
13
13
|
Requires-Dist: tyro
|
|
14
14
|
Description-Content-Type: text/markdown
|
|
@@ -25,6 +25,8 @@ HardLink/Deduplication Backups with Python
|
|
|
25
25
|
|
|
26
26
|
**WIP:** v1.0.0 is a complete rewrite of PyHardLinkBackup.
|
|
27
27
|
|
|
28
|
+
It's similar to `rsync --link-dest` but the deduplication is done globally for all backups and all paths.
|
|
29
|
+
|
|
28
30
|
## installation
|
|
29
31
|
|
|
30
32
|
You can use [pipx](https://pipx.pypa.io/stable/installation/) to install and use PyHardLinkBackup, e.g.:
|
|
@@ -212,6 +214,12 @@ Overview of main changes:
|
|
|
212
214
|
|
|
213
215
|
[comment]: <> (✂✂✂ auto generated history start ✂✂✂)
|
|
214
216
|
|
|
217
|
+
* [v1.2.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.1.0...v1.2.0)
|
|
218
|
+
* 2026-01-15 - Add error handling: Log exception but continue with the backup
|
|
219
|
+
* 2026-01-15 - Check permission and hadlink support on destination path
|
|
220
|
+
* 2026-01-14 - Enhance progress bars
|
|
221
|
+
* 2026-01-14 - A a note to rsync --link-dest
|
|
222
|
+
* 2026-01-14 - Use cli_base.cli_tools.test_utils.base_testcases
|
|
215
223
|
* [v1.1.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.1...v1.1.0)
|
|
216
224
|
* 2026-01-14 - Change backup timestamp directory to old schema: '%Y-%m-%d-%H%M%S'
|
|
217
225
|
* 2026-01-14 - Add "Overview of main changes" to README
|
|
@@ -227,6 +235,9 @@ Overview of main changes:
|
|
|
227
235
|
* 2026-01-13 - Add DocWrite, handle broken symlinks, keep file meta, handle missing hardlink sources
|
|
228
236
|
* 2026-01-12 - First working iteration with rich progess bar
|
|
229
237
|
* 2026-01-08 - Rewrite everything
|
|
238
|
+
|
|
239
|
+
<details><summary>Expand older history entries ...</summary>
|
|
240
|
+
|
|
230
241
|
* [v0.13.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.3...v0.13.0)
|
|
231
242
|
* 2020-03-18 - release v0.13.0
|
|
232
243
|
* 2020-03-17 - deactivate pypy tests in travis, because of SQLite errors, like:
|
|
@@ -245,9 +256,6 @@ Overview of main changes:
|
|
|
245
256
|
* 2020-03-17 - dynamic chunk size
|
|
246
257
|
* 2020-03-17 - ignore *.sha512 by default
|
|
247
258
|
* 2020-03-17 - Update boot_pyhardlinkbackup.sh
|
|
248
|
-
|
|
249
|
-
<details><summary>Expand older history entries ...</summary>
|
|
250
|
-
|
|
251
259
|
* [v0.12.3](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.2...v0.12.3)
|
|
252
260
|
* 2020-03-17 - update README.rst
|
|
253
261
|
* 2020-03-17 - don't publish if tests fail
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from rich import print # noqa
|
|
11
|
+
|
|
12
|
+
from PyHardLinkBackup.constants import CHUNK_SIZE
|
|
13
|
+
from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
|
|
14
|
+
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
15
|
+
from PyHardLinkBackup.utilities.filesystem import (
|
|
16
|
+
copy_and_hash,
|
|
17
|
+
hash_file,
|
|
18
|
+
humanized_fs_scan,
|
|
19
|
+
iter_scandir_files,
|
|
20
|
+
read_and_hash_file,
|
|
21
|
+
supports_hardlinks,
|
|
22
|
+
)
|
|
23
|
+
from PyHardLinkBackup.utilities.humanize import human_filesize
|
|
24
|
+
from PyHardLinkBackup.utilities.rich_utils import BackupProgress
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclasses.dataclass
|
|
31
|
+
class BackupResult:
|
|
32
|
+
backup_dir: Path
|
|
33
|
+
#
|
|
34
|
+
backup_count: int = 0
|
|
35
|
+
backup_size: int = 0
|
|
36
|
+
#
|
|
37
|
+
symlink_files: int = 0
|
|
38
|
+
hardlinked_files: int = 0
|
|
39
|
+
hardlinked_size: int = 0
|
|
40
|
+
#
|
|
41
|
+
copied_files: int = 0
|
|
42
|
+
copied_size: int = 0
|
|
43
|
+
#
|
|
44
|
+
copied_small_files: int = 0
|
|
45
|
+
copied_small_size: int = 0
|
|
46
|
+
#
|
|
47
|
+
error_count: int = 0
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def backup_one_file(
|
|
51
|
+
*,
|
|
52
|
+
src_root: Path,
|
|
53
|
+
entry: os.DirEntry,
|
|
54
|
+
size_db: FileSizeDatabase,
|
|
55
|
+
hash_db: FileHashDatabase,
|
|
56
|
+
backup_dir: Path,
|
|
57
|
+
backup_result: BackupResult,
|
|
58
|
+
) -> None:
|
|
59
|
+
backup_result.backup_count += 1
|
|
60
|
+
src_path = Path(entry.path)
|
|
61
|
+
|
|
62
|
+
dst_path = backup_dir / src_path.relative_to(src_root)
|
|
63
|
+
dst_dir_path = dst_path.parent
|
|
64
|
+
if not dst_dir_path.exists():
|
|
65
|
+
dst_dir_path.mkdir(parents=True, exist_ok=False)
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
size = entry.stat().st_size
|
|
69
|
+
except FileNotFoundError:
|
|
70
|
+
# e.g.: Handle broken symlink
|
|
71
|
+
target = os.readlink(src_path)
|
|
72
|
+
dst_path.symlink_to(target)
|
|
73
|
+
backup_result.symlink_files += 1
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
backup_result.backup_size += size
|
|
77
|
+
|
|
78
|
+
if entry.name == 'SHA256SUMS':
|
|
79
|
+
# Skip existing SHA256SUMS files in source tree,
|
|
80
|
+
# because we create our own SHA256SUMS files.
|
|
81
|
+
logger.debug('Skip existing SHA256SUMS file: %s', src_path)
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
if entry.is_symlink():
|
|
85
|
+
logger.debug('Copy symlink: %s to %s', src_path, dst_path)
|
|
86
|
+
target = os.readlink(src_path)
|
|
87
|
+
dst_path.symlink_to(target)
|
|
88
|
+
backup_result.symlink_files += 1
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
# Process regular files
|
|
92
|
+
assert entry.is_file(follow_symlinks=False), f'Unexpected non-file: {src_path}'
|
|
93
|
+
|
|
94
|
+
# Deduplication logic
|
|
95
|
+
|
|
96
|
+
if size < size_db.MIN_SIZE:
|
|
97
|
+
# Small file -> always copy without deduplication
|
|
98
|
+
logger.info('Copy small file: %s to %s', src_path, dst_path)
|
|
99
|
+
file_hash = copy_and_hash(src_path, dst_path)
|
|
100
|
+
backup_result.copied_files += 1
|
|
101
|
+
backup_result.copied_size += size
|
|
102
|
+
backup_result.copied_small_files += 1
|
|
103
|
+
backup_result.copied_small_size += size
|
|
104
|
+
store_hash(dst_path, file_hash)
|
|
105
|
+
return
|
|
106
|
+
|
|
107
|
+
if size in size_db:
|
|
108
|
+
logger.debug('File with size %iBytes found before -> hash: %s', size, src_path)
|
|
109
|
+
|
|
110
|
+
if size <= CHUNK_SIZE:
|
|
111
|
+
# File can be read complete into memory
|
|
112
|
+
logger.debug('File size %iBytes <= CHUNK_SIZE (%iBytes) -> read complete into memory', size, CHUNK_SIZE)
|
|
113
|
+
file_content, file_hash = read_and_hash_file(src_path)
|
|
114
|
+
if existing_path := hash_db.get(file_hash):
|
|
115
|
+
logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
|
|
116
|
+
os.link(existing_path, dst_path)
|
|
117
|
+
backup_result.hardlinked_files += 1
|
|
118
|
+
backup_result.hardlinked_size += size
|
|
119
|
+
else:
|
|
120
|
+
logger.info('Store unique file: %s to %s', src_path, dst_path)
|
|
121
|
+
dst_path.write_bytes(file_content)
|
|
122
|
+
hash_db[file_hash] = dst_path
|
|
123
|
+
backup_result.copied_files += 1
|
|
124
|
+
backup_result.copied_size += size
|
|
125
|
+
|
|
126
|
+
else:
|
|
127
|
+
# Large file
|
|
128
|
+
file_hash = hash_file(src_path) # Calculate hash without copying
|
|
129
|
+
|
|
130
|
+
if existing_path := hash_db.get(file_hash):
|
|
131
|
+
logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
|
|
132
|
+
os.link(existing_path, dst_path)
|
|
133
|
+
backup_result.hardlinked_files += 1
|
|
134
|
+
backup_result.hardlinked_size += size
|
|
135
|
+
else:
|
|
136
|
+
logger.info('Copy unique file: %s to %s', src_path, dst_path)
|
|
137
|
+
hash_db[file_hash] = dst_path
|
|
138
|
+
backup_result.copied_files += 1
|
|
139
|
+
backup_result.copied_size += size
|
|
140
|
+
|
|
141
|
+
# Keep original file metadata (permission bits, time stamps, and flags)
|
|
142
|
+
shutil.copy2(src_path, dst_path)
|
|
143
|
+
else:
|
|
144
|
+
# A file with this size not backuped before -> Can't be duplicate -> copy and hash
|
|
145
|
+
file_hash = copy_and_hash(src_path, dst_path)
|
|
146
|
+
size_db.add(size)
|
|
147
|
+
hash_db[file_hash] = dst_path
|
|
148
|
+
backup_result.copied_files += 1
|
|
149
|
+
backup_result.copied_size += size
|
|
150
|
+
|
|
151
|
+
store_hash(dst_path, file_hash)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def store_hash(file_path: Path, file_hash: str):
|
|
155
|
+
"""DocWrite: README.md ## SHA256SUMS
|
|
156
|
+
A `SHA256SUMS` file is stored in each backup directory containing the SHA256 hashes of all files in that directory.
|
|
157
|
+
It's the same format as e.g.: `sha256sum * > SHA256SUMS` command produces.
|
|
158
|
+
So it's possible to verify the integrity of the backup files later.
|
|
159
|
+
e.g.:
|
|
160
|
+
```bash
|
|
161
|
+
cd .../your/backup/foobar/20240101_120000/
|
|
162
|
+
sha256sum -c SHA256SUMS
|
|
163
|
+
```
|
|
164
|
+
"""
|
|
165
|
+
hash_file_path = file_path.parent / 'SHA256SUMS'
|
|
166
|
+
with hash_file_path.open('a') as f:
|
|
167
|
+
f.write(f'{file_hash} {file_path.name}\n')
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> BackupResult:
|
|
171
|
+
src_root = src_root.resolve()
|
|
172
|
+
if not src_root.is_dir():
|
|
173
|
+
print('Error: Source directory does not exist!')
|
|
174
|
+
print(f'Please check source directory: "{src_root}"\n')
|
|
175
|
+
sys.exit(1)
|
|
176
|
+
|
|
177
|
+
backup_root = backup_root.resolve()
|
|
178
|
+
if not backup_root.is_dir():
|
|
179
|
+
print('Error: Backup directory does not exist!')
|
|
180
|
+
print(f'Please create "{backup_root}" directory first and start again!\n')
|
|
181
|
+
sys.exit(1)
|
|
182
|
+
|
|
183
|
+
if not os.access(backup_root, os.W_OK):
|
|
184
|
+
print('Error: No write access to backup directory!')
|
|
185
|
+
print(f'Please check permissions for backup directory: "{backup_root}"\n')
|
|
186
|
+
sys.exit(1)
|
|
187
|
+
|
|
188
|
+
if not supports_hardlinks(backup_root):
|
|
189
|
+
print('Error: Filesystem for backup directory does not support hardlinks!')
|
|
190
|
+
print(f'Please check backup directory: "{backup_root}"\n')
|
|
191
|
+
sys.exit(1)
|
|
192
|
+
|
|
193
|
+
# Step 1: Scan source directory:
|
|
194
|
+
src_file_count, src_total_size = humanized_fs_scan(src_root, excludes)
|
|
195
|
+
|
|
196
|
+
phlb_conf_dir = backup_root / '.phlb'
|
|
197
|
+
phlb_conf_dir.mkdir(parents=False, exist_ok=True)
|
|
198
|
+
|
|
199
|
+
backup_dir = backup_root / src_root.name / datetime.now().strftime('%Y-%m-%d-%H%M%S')
|
|
200
|
+
logger.info('Backup %s to %s', src_root, backup_dir)
|
|
201
|
+
backup_dir.mkdir(parents=True, exist_ok=False)
|
|
202
|
+
|
|
203
|
+
print(f'\nBackup to {backup_dir}...\n')
|
|
204
|
+
|
|
205
|
+
with BackupProgress(src_file_count, src_total_size) as progress:
|
|
206
|
+
# "Databases" for deduplication
|
|
207
|
+
size_db = FileSizeDatabase(phlb_conf_dir)
|
|
208
|
+
hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
|
|
209
|
+
|
|
210
|
+
backup_result = BackupResult(backup_dir=backup_dir)
|
|
211
|
+
|
|
212
|
+
next_update = 0
|
|
213
|
+
for entry in iter_scandir_files(src_root, excludes=excludes):
|
|
214
|
+
try:
|
|
215
|
+
backup_one_file(
|
|
216
|
+
src_root=src_root,
|
|
217
|
+
entry=entry,
|
|
218
|
+
size_db=size_db,
|
|
219
|
+
hash_db=hash_db,
|
|
220
|
+
backup_dir=backup_dir,
|
|
221
|
+
backup_result=backup_result,
|
|
222
|
+
)
|
|
223
|
+
except Exception as err:
|
|
224
|
+
logger.exception(f'Backup {entry.path} {err.__class__.__name__}: {err}')
|
|
225
|
+
backup_result.error_count += 1
|
|
226
|
+
else:
|
|
227
|
+
now = time.monotonic()
|
|
228
|
+
if now >= next_update:
|
|
229
|
+
progress.update(backup_count=backup_result.backup_count, backup_size=backup_result.backup_size)
|
|
230
|
+
next_update = now + 0.5
|
|
231
|
+
|
|
232
|
+
# Finalize progress indicator values:
|
|
233
|
+
progress.update(backup_count=backup_result.backup_count, backup_size=backup_result.backup_size)
|
|
234
|
+
|
|
235
|
+
print(f'\nBackup complete: {backup_dir} (total size {human_filesize(backup_result.backup_size)})\n')
|
|
236
|
+
print(f' Total files processed: {backup_result.backup_count}')
|
|
237
|
+
print(f' * Symlinked files: {backup_result.symlink_files}')
|
|
238
|
+
print(
|
|
239
|
+
f' * Hardlinked files: {backup_result.hardlinked_files}'
|
|
240
|
+
f' (saved {human_filesize(backup_result.hardlinked_size)})'
|
|
241
|
+
)
|
|
242
|
+
print(f' * Copied files: {backup_result.copied_files} (total {human_filesize(backup_result.copied_size)})')
|
|
243
|
+
print(
|
|
244
|
+
f' of which small (<{size_db.MIN_SIZE} Bytes)'
|
|
245
|
+
f' files: {backup_result.copied_small_files}'
|
|
246
|
+
f' (total {human_filesize(backup_result.copied_small_size)})'
|
|
247
|
+
)
|
|
248
|
+
if backup_result.error_count > 0:
|
|
249
|
+
print(f' Errors during backup: {backup_result.error_count} (see log for details)')
|
|
250
|
+
print()
|
|
251
|
+
|
|
252
|
+
return backup_result
|
|
@@ -13,14 +13,14 @@ from bx_py_utils.test_utils.assertion import assert_text_equal
|
|
|
13
13
|
from bx_py_utils.test_utils.datetime import parse_dt
|
|
14
14
|
from bx_py_utils.test_utils.log_utils import NoLogs
|
|
15
15
|
from bx_py_utils.test_utils.redirect import RedirectOut
|
|
16
|
+
from cli_base.cli_tools.test_utils.base_testcases import BaseTestCase
|
|
16
17
|
from freezegun import freeze_time
|
|
17
18
|
from tabulate import tabulate
|
|
18
19
|
|
|
19
20
|
from PyHardLinkBackup.backup import BackupResult, backup_tree
|
|
20
21
|
from PyHardLinkBackup.constants import CHUNK_SIZE
|
|
21
22
|
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
22
|
-
from PyHardLinkBackup.utilities.filesystem import iter_scandir_files
|
|
23
|
-
from PyHardLinkBackup.utilities.tests.base_testcases import BaseTestCase
|
|
23
|
+
from PyHardLinkBackup.utilities.filesystem import copy_and_hash, iter_scandir_files
|
|
24
24
|
from PyHardLinkBackup.utilities.tests.test_file_hash_database import assert_hash_db_info
|
|
25
25
|
|
|
26
26
|
|
|
@@ -184,6 +184,7 @@ class BackupTreeTestCase(BaseTestCase):
|
|
|
184
184
|
copied_size=67109915,
|
|
185
185
|
copied_small_files=3,
|
|
186
186
|
copied_small_size=50,
|
|
187
|
+
error_count=0,
|
|
187
188
|
),
|
|
188
189
|
)
|
|
189
190
|
|
|
@@ -267,6 +268,7 @@ class BackupTreeTestCase(BaseTestCase):
|
|
|
267
268
|
copied_size=50,
|
|
268
269
|
copied_small_files=3,
|
|
269
270
|
copied_small_size=50,
|
|
271
|
+
error_count=0,
|
|
270
272
|
),
|
|
271
273
|
)
|
|
272
274
|
# The second backup:
|
|
@@ -360,6 +362,7 @@ class BackupTreeTestCase(BaseTestCase):
|
|
|
360
362
|
copied_size=1050,
|
|
361
363
|
copied_small_files=3,
|
|
362
364
|
copied_small_size=50,
|
|
365
|
+
error_count=0,
|
|
363
366
|
),
|
|
364
367
|
)
|
|
365
368
|
|
|
@@ -454,6 +457,7 @@ class BackupTreeTestCase(BaseTestCase):
|
|
|
454
457
|
copied_size=31,
|
|
455
458
|
copied_small_files=1,
|
|
456
459
|
copied_small_size=31,
|
|
460
|
+
error_count=0,
|
|
457
461
|
),
|
|
458
462
|
)
|
|
459
463
|
|
|
@@ -474,3 +478,65 @@ class BackupTreeTestCase(BaseTestCase):
|
|
|
474
478
|
Symlinks are not stored in our FileHashDatabase, because they are not considered for hardlinking."""
|
|
475
479
|
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
476
480
|
assert_hash_db_info(backup_root=backup_root, expected='')
|
|
481
|
+
|
|
482
|
+
def test_error_handling(self):
|
|
483
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
484
|
+
temp_path = Path(temp_dir)
|
|
485
|
+
|
|
486
|
+
src_root = temp_path / 'source'
|
|
487
|
+
backup_root = temp_path / 'backup'
|
|
488
|
+
|
|
489
|
+
src_root.mkdir()
|
|
490
|
+
backup_root.mkdir()
|
|
491
|
+
|
|
492
|
+
(src_root / 'file1.txt').write_text('File 1')
|
|
493
|
+
(src_root / 'file2.txt').write_text('File 2')
|
|
494
|
+
(src_root / 'file3.txt').write_text('File 3')
|
|
495
|
+
|
|
496
|
+
# Set modification times to a fixed time for easier testing:
|
|
497
|
+
set_file_times(src_root, dt=parse_dt('2026-01-01T12:00:00+0000'))
|
|
498
|
+
|
|
499
|
+
def mocked_copy_and_hash(src: Path, dst: Path):
|
|
500
|
+
if src.name == 'file2.txt':
|
|
501
|
+
raise PermissionError('Bam!')
|
|
502
|
+
else:
|
|
503
|
+
return copy_and_hash(src, dst)
|
|
504
|
+
|
|
505
|
+
with (
|
|
506
|
+
self.assertLogs(level=logging.ERROR) as logs,
|
|
507
|
+
patch('PyHardLinkBackup.backup.iter_scandir_files', SortedIterScandirFiles),
|
|
508
|
+
patch('PyHardLinkBackup.backup.copy_and_hash', mocked_copy_and_hash),
|
|
509
|
+
freeze_time('2026-01-01T12:34:56Z', auto_tick_seconds=0),
|
|
510
|
+
RedirectOut() as redirected_out,
|
|
511
|
+
):
|
|
512
|
+
result = backup_tree(
|
|
513
|
+
src_root=src_root,
|
|
514
|
+
backup_root=backup_root,
|
|
515
|
+
excludes={'.cache'},
|
|
516
|
+
)
|
|
517
|
+
self.assertEqual(redirected_out.stderr, '')
|
|
518
|
+
self.assertIn('Backup complete', redirected_out.stdout)
|
|
519
|
+
self.assertIn('Errors during backup:', redirected_out.stdout)
|
|
520
|
+
|
|
521
|
+
logs = ''.join(logs.output)
|
|
522
|
+
self.assertIn(
|
|
523
|
+
f'ERROR:PyHardLinkBackup.backup:Backup {src_root / "file2.txt"} PermissionError: Bam!\n',
|
|
524
|
+
logs,
|
|
525
|
+
)
|
|
526
|
+
self.assertIn('\nTraceback (most recent call last):\n', logs)
|
|
527
|
+
self.assertEqual(
|
|
528
|
+
result,
|
|
529
|
+
BackupResult(
|
|
530
|
+
backup_dir=result.backup_dir,
|
|
531
|
+
backup_count=3,
|
|
532
|
+
backup_size=18,
|
|
533
|
+
symlink_files=0,
|
|
534
|
+
hardlinked_files=0,
|
|
535
|
+
hardlinked_size=0,
|
|
536
|
+
copied_files=2,
|
|
537
|
+
copied_size=12,
|
|
538
|
+
copied_small_files=2,
|
|
539
|
+
copied_small_size=12,
|
|
540
|
+
error_count=1,
|
|
541
|
+
),
|
|
542
|
+
)
|
|
@@ -19,7 +19,8 @@ class DocuWriteApiTestCase(TestCase):
|
|
|
19
19
|
"""
|
|
20
20
|
assert_is_file(PACKAGE_ROOT / 'pyproject.toml')
|
|
21
21
|
|
|
22
|
-
|
|
22
|
+
with self.assertLogs():
|
|
23
|
+
info: GeneratedInfo = generate(base_path=PACKAGE_ROOT)
|
|
23
24
|
self.assertGreaterEqual(len(info.paths), 1)
|
|
24
25
|
self.assertEqual(info.update_count, 0, 'No files should be updated, commit the changes')
|
|
25
26
|
self.assertEqual(info.remove_count, 0, 'No files should be removed, commit the changes')
|
|
@@ -6,6 +6,7 @@ import time
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from typing import Iterable
|
|
8
8
|
|
|
9
|
+
from bx_py_utils.path import assert_is_dir
|
|
9
10
|
from rich.progress import (
|
|
10
11
|
Progress,
|
|
11
12
|
SpinnerColumn,
|
|
@@ -80,8 +81,10 @@ def humanized_fs_scan(path: Path, excludes: set[str]) -> tuple[int, int]:
|
|
|
80
81
|
'{task.description}',
|
|
81
82
|
SpinnerColumn('simpleDots'),
|
|
82
83
|
TextColumn('[green]{task.fields[file_count]} Files'),
|
|
84
|
+
'|',
|
|
83
85
|
HumanFileSizeColumn(field_name='total_size'),
|
|
84
|
-
|
|
86
|
+
'|',
|
|
87
|
+
TextColumn('[cyan]{task.fields[files_per_sec]} Files/sec'),
|
|
85
88
|
)
|
|
86
89
|
|
|
87
90
|
file_count = 0
|
|
@@ -131,3 +134,25 @@ def humanized_fs_scan(path: Path, excludes: set[str]) -> tuple[int, int]:
|
|
|
131
134
|
)
|
|
132
135
|
|
|
133
136
|
return file_count, total_size
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def supports_hardlinks(directory: Path) -> bool:
|
|
140
|
+
logger.debug('Checking hardlink support in %s', directory)
|
|
141
|
+
assert_is_dir(directory)
|
|
142
|
+
test_src_file = directory / '.phlb_test'
|
|
143
|
+
test_dst_file = directory / '.phlb_test_link'
|
|
144
|
+
hardlinks_supported = False
|
|
145
|
+
try:
|
|
146
|
+
test_src_file.write_text('test')
|
|
147
|
+
os.link(test_src_file, test_dst_file)
|
|
148
|
+
assert test_dst_file.read_text() == 'test'
|
|
149
|
+
hardlinks_supported = True
|
|
150
|
+
except OSError as err:
|
|
151
|
+
# e.g.: FAT/exFAT filesystems ;)
|
|
152
|
+
logger.exception('Hardlink test failed in %s: %s', directory, err)
|
|
153
|
+
finally:
|
|
154
|
+
test_src_file.unlink(missing_ok=True)
|
|
155
|
+
test_dst_file.unlink(missing_ok=True)
|
|
156
|
+
|
|
157
|
+
logger.info('Hardlink support in %s: %s', directory, hardlinks_supported)
|
|
158
|
+
return hardlinks_supported
|
|
@@ -29,13 +29,14 @@ class HumanFileSizeColumn(ProgressColumn):
|
|
|
29
29
|
file_size = task.fields[self.field_name]
|
|
30
30
|
except KeyError:
|
|
31
31
|
raise KeyError(f'Field {self.field_name=} not found in: {task.fields.keys()=}') from None
|
|
32
|
-
return Text(
|
|
32
|
+
return Text(human_filesize(file_size))
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class BackupProgress:
|
|
36
36
|
def __init__(self, src_file_count: int, src_total_size: int):
|
|
37
|
+
percentage_format = '[progress.percentage]{task.percentage:>3.1f}%'
|
|
37
38
|
self.overall_progress = Progress(
|
|
38
|
-
TaskProgressColumn(),
|
|
39
|
+
TaskProgressColumn(text_format=percentage_format),
|
|
39
40
|
BarColumn(bar_width=50),
|
|
40
41
|
TextColumn('Elapsed:'),
|
|
41
42
|
TimeElapsedColumn(),
|
|
@@ -45,15 +46,15 @@ class BackupProgress:
|
|
|
45
46
|
self.overall_progress_task_id = self.overall_progress.add_task(description='', total=100)
|
|
46
47
|
|
|
47
48
|
self.file_count_progress = Progress(
|
|
48
|
-
TaskProgressColumn(),
|
|
49
|
+
TaskProgressColumn(text_format=percentage_format),
|
|
49
50
|
BarColumn(bar_width=50),
|
|
50
|
-
TextColumn('{task.completed} Files'),
|
|
51
|
+
TextColumn('{task.completed}/{task.total} Files'),
|
|
51
52
|
)
|
|
52
53
|
self.file_count_progress_task_id = self.file_count_progress.add_task(description='', total=src_file_count)
|
|
53
54
|
self.file_count_progress_task = self.file_count_progress.tasks[0]
|
|
54
55
|
|
|
55
56
|
self.file_size_progress = Progress(
|
|
56
|
-
TaskProgressColumn(),
|
|
57
|
+
TaskProgressColumn(text_format=percentage_format),
|
|
57
58
|
BarColumn(bar_width=50),
|
|
58
59
|
HumanFileSizeColumn(),
|
|
59
60
|
'|',
|
|
@@ -6,10 +6,10 @@ from pathlib import Path
|
|
|
6
6
|
from bx_py_utils.path import assert_is_dir
|
|
7
7
|
from bx_py_utils.test_utils.assertion import assert_text_equal
|
|
8
8
|
from bx_py_utils.test_utils.log_utils import NoLogs
|
|
9
|
+
from cli_base.cli_tools.test_utils.base_testcases import BaseTestCase
|
|
9
10
|
|
|
10
11
|
from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase, HashAlreadyExistsError
|
|
11
12
|
from PyHardLinkBackup.utilities.filesystem import iter_scandir_files
|
|
12
|
-
from PyHardLinkBackup.utilities.tests.base_testcases import BaseTestCase
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class TemporaryFileHashDatabase(tempfile.TemporaryDirectory):
|
|
@@ -4,10 +4,10 @@ from collections.abc import Iterable
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
6
|
from bx_py_utils.test_utils.log_utils import NoLogs
|
|
7
|
+
from cli_base.cli_tools.test_utils.base_testcases import BaseTestCase
|
|
7
8
|
|
|
8
9
|
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
9
10
|
from PyHardLinkBackup.utilities.filesystem import iter_scandir_files
|
|
10
|
-
from PyHardLinkBackup.utilities.tests.base_testcases import BaseTestCase
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class TemporaryFileSizeDatabase(tempfile.TemporaryDirectory):
|
|
@@ -1,14 +1,25 @@
|
|
|
1
1
|
import hashlib
|
|
2
|
+
import logging
|
|
2
3
|
import os
|
|
3
4
|
import tempfile
|
|
4
5
|
from pathlib import Path
|
|
6
|
+
from unittest.mock import patch
|
|
7
|
+
|
|
8
|
+
from cli_base.cli_tools.test_utils.base_testcases import BaseTestCase
|
|
5
9
|
|
|
6
10
|
from PyHardLinkBackup.constants import HASH_ALGO
|
|
7
|
-
from PyHardLinkBackup.utilities.filesystem import
|
|
8
|
-
|
|
11
|
+
from PyHardLinkBackup.utilities.filesystem import (
|
|
12
|
+
copy_and_hash,
|
|
13
|
+
hash_file,
|
|
14
|
+
iter_scandir_files,
|
|
15
|
+
read_and_hash_file,
|
|
16
|
+
supports_hardlinks,
|
|
17
|
+
)
|
|
9
18
|
|
|
10
19
|
|
|
11
20
|
class TestHashFile(BaseTestCase):
|
|
21
|
+
maxDiff = None
|
|
22
|
+
|
|
12
23
|
def test_hash_file(self):
|
|
13
24
|
self.assertEqual(
|
|
14
25
|
hashlib.new(HASH_ALGO, b'test content').hexdigest(),
|
|
@@ -92,3 +103,24 @@ class TestHashFile(BaseTestCase):
|
|
|
92
103
|
logs = ''.join(logs.output)
|
|
93
104
|
self.assertIn('Scanning directory ', logs)
|
|
94
105
|
self.assertIn('Excluding directory ', logs)
|
|
106
|
+
|
|
107
|
+
def test_supports_hardlinks(self):
|
|
108
|
+
with tempfile.TemporaryDirectory() as temp:
|
|
109
|
+
with self.assertLogs(level=logging.INFO) as logs:
|
|
110
|
+
self.assertTrue(supports_hardlinks(Path(temp)))
|
|
111
|
+
self.assertEqual(
|
|
112
|
+
''.join(logs.output),
|
|
113
|
+
f'INFO:PyHardLinkBackup.utilities.filesystem:Hardlink support in {temp}: True',
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
with (
|
|
117
|
+
self.assertLogs(level=logging.ERROR) as logs,
|
|
118
|
+
patch('PyHardLinkBackup.utilities.filesystem.os.link', side_effect=OSError),
|
|
119
|
+
):
|
|
120
|
+
self.assertFalse(supports_hardlinks(Path(temp)))
|
|
121
|
+
logs = ''.join(logs.output)
|
|
122
|
+
self.assertIn(f'Hardlink test failed in {temp}:', logs)
|
|
123
|
+
self.assertIn('OSError', logs)
|
|
124
|
+
|
|
125
|
+
with self.assertLogs(level=logging.DEBUG), self.assertRaises(NotADirectoryError):
|
|
126
|
+
supports_hardlinks(Path('/not/existing/directory'))
|
|
@@ -10,6 +10,8 @@ HardLink/Deduplication Backups with Python
|
|
|
10
10
|
|
|
11
11
|
**WIP:** v1.0.0 is a complete rewrite of PyHardLinkBackup.
|
|
12
12
|
|
|
13
|
+
It's similar to `rsync --link-dest` but the deduplication is done globally for all backups and all paths.
|
|
14
|
+
|
|
13
15
|
## installation
|
|
14
16
|
|
|
15
17
|
You can use [pipx](https://pipx.pypa.io/stable/installation/) to install and use PyHardLinkBackup, e.g.:
|
|
@@ -197,6 +199,12 @@ Overview of main changes:
|
|
|
197
199
|
|
|
198
200
|
[comment]: <> (✂✂✂ auto generated history start ✂✂✂)
|
|
199
201
|
|
|
202
|
+
* [v1.2.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.1.0...v1.2.0)
|
|
203
|
+
* 2026-01-15 - Add error handling: Log exception but continue with the backup
|
|
204
|
+
* 2026-01-15 - Check permission and hadlink support on destination path
|
|
205
|
+
* 2026-01-14 - Enhance progress bars
|
|
206
|
+
* 2026-01-14 - A a note to rsync --link-dest
|
|
207
|
+
* 2026-01-14 - Use cli_base.cli_tools.test_utils.base_testcases
|
|
200
208
|
* [v1.1.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.1...v1.1.0)
|
|
201
209
|
* 2026-01-14 - Change backup timestamp directory to old schema: '%Y-%m-%d-%H%M%S'
|
|
202
210
|
* 2026-01-14 - Add "Overview of main changes" to README
|
|
@@ -212,6 +220,9 @@ Overview of main changes:
|
|
|
212
220
|
* 2026-01-13 - Add DocWrite, handle broken symlinks, keep file meta, handle missing hardlink sources
|
|
213
221
|
* 2026-01-12 - First working iteration with rich progess bar
|
|
214
222
|
* 2026-01-08 - Rewrite everything
|
|
223
|
+
|
|
224
|
+
<details><summary>Expand older history entries ...</summary>
|
|
225
|
+
|
|
215
226
|
* [v0.13.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.3...v0.13.0)
|
|
216
227
|
* 2020-03-18 - release v0.13.0
|
|
217
228
|
* 2020-03-17 - deactivate pypy tests in travis, because of SQLite errors, like:
|
|
@@ -230,9 +241,6 @@ Overview of main changes:
|
|
|
230
241
|
* 2020-03-17 - dynamic chunk size
|
|
231
242
|
* 2020-03-17 - ignore *.sha512 by default
|
|
232
243
|
* 2020-03-17 - Update boot_pyhardlinkbackup.sh
|
|
233
|
-
|
|
234
|
-
<details><summary>Expand older history entries ...</summary>
|
|
235
|
-
|
|
236
244
|
* [v0.12.3](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.2...v0.12.3)
|
|
237
245
|
* 2020-03-17 - update README.rst
|
|
238
246
|
* 2020-03-17 - don't publish if tests fail
|
|
@@ -9,7 +9,7 @@ authors = [
|
|
|
9
9
|
]
|
|
10
10
|
requires-python = ">=3.12"
|
|
11
11
|
dependencies = [
|
|
12
|
-
"cli-base-utilities", # https://github.com/jedie/cli-base-utilities
|
|
12
|
+
"cli-base-utilities>=0.27.0", # https://github.com/jedie/cli-base-utilities
|
|
13
13
|
"bx_py_utils", # https://github.com/boxine/bx_py_utils
|
|
14
14
|
"tyro", # https://github.com/brentyi/tyro
|
|
15
15
|
"rich", # https://github.com/Textualize/rich
|