PyHardLinkBackup 1.0.1__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/.pre-commit-config.yaml +1 -1
- pyhardlinkbackup-1.0.1/README.md → pyhardlinkbackup-1.2.0/PKG-INFO +37 -3
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/__init__.py +1 -1
- pyhardlinkbackup-1.2.0/PyHardLinkBackup/backup.py +252 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_backup.py +201 -110
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_doc_write.py +2 -1
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_readme_history.py +2 -1
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/filesystem.py +26 -1
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/rich_utils.py +6 -5
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +40 -35
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/tests/test_file_size_database.py +48 -41
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/tests/test_filesystem.py +35 -3
- pyhardlinkbackup-1.0.1/PKG-INFO → pyhardlinkbackup-1.2.0/README.md +22 -18
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/pyproject.toml +1 -1
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/uv.lock +80 -80
- pyhardlinkbackup-1.0.1/PyHardLinkBackup/backup.py +0 -229
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/.editorconfig +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/.github/workflows/tests.yml +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/.gitignore +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/.idea/.gitignore +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/.pre-commit-hooks.yaml +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/.run/Template Python tests.run.xml +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/.run/Unittests - __all__.run.xml +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/.run/cli.py --help.run.xml +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/.run/dev-cli update.run.xml +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/.run/only DocTests.run.xml +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/.run/only DocWrite.run.xml +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/.venv-app/lib/python3.12/site-packages/cli_base/tests/shell_complete_snapshots/.gitignore +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/__main__.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_app/__init__.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_app/phlb.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/__init__.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/benchmark.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/code_style.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/packaging.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/shell_completion.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/testing.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/cli_dev/update_readme_history.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/constants.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/__init__.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_doctests.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_project_setup.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/tests/test_readme.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/__init__.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/file_hash_database.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/file_size_database.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/humanize.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/PyHardLinkBackup/utilities/tests/__init__.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/cli.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/dev-cli.py +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/dist/.gitignore +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/docs/README.md +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/docs/about-docs.md +0 -0
- {pyhardlinkbackup-1.0.1 → pyhardlinkbackup-1.2.0}/noxfile.py +0 -0
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: PyHardLinkBackup
|
|
3
|
+
Version: 1.2.0
|
|
4
|
+
Summary: HardLink/Deduplication Backups with Python
|
|
5
|
+
Project-URL: Documentation, https://github.com/jedie/PyHardLinkBackup
|
|
6
|
+
Project-URL: Source, https://github.com/jedie/PyHardLinkBackup
|
|
7
|
+
Author-email: Jens Diemer <PyHardLinkBackup@jensdiemer.de>
|
|
8
|
+
License: GPL-3.0-or-later
|
|
9
|
+
Requires-Python: >=3.12
|
|
10
|
+
Requires-Dist: bx-py-utils
|
|
11
|
+
Requires-Dist: cli-base-utilities>=0.27.0
|
|
12
|
+
Requires-Dist: rich
|
|
13
|
+
Requires-Dist: tyro
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
1
16
|
# PyHardLinkBackup
|
|
2
17
|
|
|
3
18
|
[](https://github.com/jedie/PyHardLinkBackup/actions/workflows/tests.yml)
|
|
@@ -10,6 +25,8 @@ HardLink/Deduplication Backups with Python
|
|
|
10
25
|
|
|
11
26
|
**WIP:** v1.0.0 is a complete rewrite of PyHardLinkBackup.
|
|
12
27
|
|
|
28
|
+
It's similar to `rsync --link-dest` but the deduplication is done globally for all backups and all paths.
|
|
29
|
+
|
|
13
30
|
## installation
|
|
14
31
|
|
|
15
32
|
You can use [pipx](https://pipx.pypa.io/stable/installation/) to install and use PyHardLinkBackup, e.g.:
|
|
@@ -185,10 +202,27 @@ usage: ./dev-cli.py [-h] {benchmark-hashes,coverage,install,lint,mypy,nox,pip-au
|
|
|
185
202
|
|
|
186
203
|
v1 is a complete rewrite of PyHardLinkBackup.
|
|
187
204
|
|
|
205
|
+
Overview of main changes:
|
|
206
|
+
|
|
207
|
+
* Remove Django dependency:
|
|
208
|
+
* No SQlite database anymore -> Data for deduplication stored in filesystem only
|
|
209
|
+
* No Django Admin, because we have no database anymore ;)
|
|
210
|
+
* Change hash algorithm from SHA512 to SHA256, because it's faster and still secure enough
|
|
211
|
+
* Don't store `*.sha512` for every file anymore -> We store one `SHA256SUMS` file in every backup directory
|
|
212
|
+
|
|
188
213
|
## History
|
|
189
214
|
|
|
190
215
|
[comment]: <> (✂✂✂ auto generated history start ✂✂✂)
|
|
191
216
|
|
|
217
|
+
* [v1.2.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.1.0...v1.2.0)
|
|
218
|
+
* 2026-01-15 - Add error handling: Log exception but continue with the backup
|
|
219
|
+
* 2026-01-15 - Check permission and hadlink support on destination path
|
|
220
|
+
* 2026-01-14 - Enhance progress bars
|
|
221
|
+
* 2026-01-14 - A a note to rsync --link-dest
|
|
222
|
+
* 2026-01-14 - Use cli_base.cli_tools.test_utils.base_testcases
|
|
223
|
+
* [v1.1.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.1...v1.1.0)
|
|
224
|
+
* 2026-01-14 - Change backup timestamp directory to old schema: '%Y-%m-%d-%H%M%S'
|
|
225
|
+
* 2026-01-14 - Add "Overview of main changes" to README
|
|
192
226
|
* [v1.0.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.0...v1.0.1)
|
|
193
227
|
* 2026-01-13 - Store SHA256SUMS files in backup directories
|
|
194
228
|
* [v1.0.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.13.0...v1.0.0)
|
|
@@ -201,6 +235,9 @@ v1 is a complete rewrite of PyHardLinkBackup.
|
|
|
201
235
|
* 2026-01-13 - Add DocWrite, handle broken symlinks, keep file meta, handle missing hardlink sources
|
|
202
236
|
* 2026-01-12 - First working iteration with rich progess bar
|
|
203
237
|
* 2026-01-08 - Rewrite everything
|
|
238
|
+
|
|
239
|
+
<details><summary>Expand older history entries ...</summary>
|
|
240
|
+
|
|
204
241
|
* [v0.13.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.3...v0.13.0)
|
|
205
242
|
* 2020-03-18 - release v0.13.0
|
|
206
243
|
* 2020-03-17 - deactivate pypy tests in travis, because of SQLite errors, like:
|
|
@@ -228,9 +265,6 @@ v1 is a complete rewrite of PyHardLinkBackup.
|
|
|
228
265
|
* 2020-03-16 - just warn if used directly (needfull for devlopment to call this directly ;)
|
|
229
266
|
* 2020-03-16 - update requirements
|
|
230
267
|
* 2020-03-16 - +pytest-randomly
|
|
231
|
-
|
|
232
|
-
<details><summary>Expand older history entries ...</summary>
|
|
233
|
-
|
|
234
268
|
* [v0.12.2](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.1...v0.12.2)
|
|
235
269
|
* 2020-03-06 - repare v0.12.2 release
|
|
236
270
|
* 2020-03-06 - enhance log file content
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from rich import print # noqa
|
|
11
|
+
|
|
12
|
+
from PyHardLinkBackup.constants import CHUNK_SIZE
|
|
13
|
+
from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
|
|
14
|
+
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
15
|
+
from PyHardLinkBackup.utilities.filesystem import (
|
|
16
|
+
copy_and_hash,
|
|
17
|
+
hash_file,
|
|
18
|
+
humanized_fs_scan,
|
|
19
|
+
iter_scandir_files,
|
|
20
|
+
read_and_hash_file,
|
|
21
|
+
supports_hardlinks,
|
|
22
|
+
)
|
|
23
|
+
from PyHardLinkBackup.utilities.humanize import human_filesize
|
|
24
|
+
from PyHardLinkBackup.utilities.rich_utils import BackupProgress
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclasses.dataclass
|
|
31
|
+
class BackupResult:
|
|
32
|
+
backup_dir: Path
|
|
33
|
+
#
|
|
34
|
+
backup_count: int = 0
|
|
35
|
+
backup_size: int = 0
|
|
36
|
+
#
|
|
37
|
+
symlink_files: int = 0
|
|
38
|
+
hardlinked_files: int = 0
|
|
39
|
+
hardlinked_size: int = 0
|
|
40
|
+
#
|
|
41
|
+
copied_files: int = 0
|
|
42
|
+
copied_size: int = 0
|
|
43
|
+
#
|
|
44
|
+
copied_small_files: int = 0
|
|
45
|
+
copied_small_size: int = 0
|
|
46
|
+
#
|
|
47
|
+
error_count: int = 0
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def backup_one_file(
|
|
51
|
+
*,
|
|
52
|
+
src_root: Path,
|
|
53
|
+
entry: os.DirEntry,
|
|
54
|
+
size_db: FileSizeDatabase,
|
|
55
|
+
hash_db: FileHashDatabase,
|
|
56
|
+
backup_dir: Path,
|
|
57
|
+
backup_result: BackupResult,
|
|
58
|
+
) -> None:
|
|
59
|
+
backup_result.backup_count += 1
|
|
60
|
+
src_path = Path(entry.path)
|
|
61
|
+
|
|
62
|
+
dst_path = backup_dir / src_path.relative_to(src_root)
|
|
63
|
+
dst_dir_path = dst_path.parent
|
|
64
|
+
if not dst_dir_path.exists():
|
|
65
|
+
dst_dir_path.mkdir(parents=True, exist_ok=False)
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
size = entry.stat().st_size
|
|
69
|
+
except FileNotFoundError:
|
|
70
|
+
# e.g.: Handle broken symlink
|
|
71
|
+
target = os.readlink(src_path)
|
|
72
|
+
dst_path.symlink_to(target)
|
|
73
|
+
backup_result.symlink_files += 1
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
backup_result.backup_size += size
|
|
77
|
+
|
|
78
|
+
if entry.name == 'SHA256SUMS':
|
|
79
|
+
# Skip existing SHA256SUMS files in source tree,
|
|
80
|
+
# because we create our own SHA256SUMS files.
|
|
81
|
+
logger.debug('Skip existing SHA256SUMS file: %s', src_path)
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
if entry.is_symlink():
|
|
85
|
+
logger.debug('Copy symlink: %s to %s', src_path, dst_path)
|
|
86
|
+
target = os.readlink(src_path)
|
|
87
|
+
dst_path.symlink_to(target)
|
|
88
|
+
backup_result.symlink_files += 1
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
# Process regular files
|
|
92
|
+
assert entry.is_file(follow_symlinks=False), f'Unexpected non-file: {src_path}'
|
|
93
|
+
|
|
94
|
+
# Deduplication logic
|
|
95
|
+
|
|
96
|
+
if size < size_db.MIN_SIZE:
|
|
97
|
+
# Small file -> always copy without deduplication
|
|
98
|
+
logger.info('Copy small file: %s to %s', src_path, dst_path)
|
|
99
|
+
file_hash = copy_and_hash(src_path, dst_path)
|
|
100
|
+
backup_result.copied_files += 1
|
|
101
|
+
backup_result.copied_size += size
|
|
102
|
+
backup_result.copied_small_files += 1
|
|
103
|
+
backup_result.copied_small_size += size
|
|
104
|
+
store_hash(dst_path, file_hash)
|
|
105
|
+
return
|
|
106
|
+
|
|
107
|
+
if size in size_db:
|
|
108
|
+
logger.debug('File with size %iBytes found before -> hash: %s', size, src_path)
|
|
109
|
+
|
|
110
|
+
if size <= CHUNK_SIZE:
|
|
111
|
+
# File can be read complete into memory
|
|
112
|
+
logger.debug('File size %iBytes <= CHUNK_SIZE (%iBytes) -> read complete into memory', size, CHUNK_SIZE)
|
|
113
|
+
file_content, file_hash = read_and_hash_file(src_path)
|
|
114
|
+
if existing_path := hash_db.get(file_hash):
|
|
115
|
+
logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
|
|
116
|
+
os.link(existing_path, dst_path)
|
|
117
|
+
backup_result.hardlinked_files += 1
|
|
118
|
+
backup_result.hardlinked_size += size
|
|
119
|
+
else:
|
|
120
|
+
logger.info('Store unique file: %s to %s', src_path, dst_path)
|
|
121
|
+
dst_path.write_bytes(file_content)
|
|
122
|
+
hash_db[file_hash] = dst_path
|
|
123
|
+
backup_result.copied_files += 1
|
|
124
|
+
backup_result.copied_size += size
|
|
125
|
+
|
|
126
|
+
else:
|
|
127
|
+
# Large file
|
|
128
|
+
file_hash = hash_file(src_path) # Calculate hash without copying
|
|
129
|
+
|
|
130
|
+
if existing_path := hash_db.get(file_hash):
|
|
131
|
+
logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
|
|
132
|
+
os.link(existing_path, dst_path)
|
|
133
|
+
backup_result.hardlinked_files += 1
|
|
134
|
+
backup_result.hardlinked_size += size
|
|
135
|
+
else:
|
|
136
|
+
logger.info('Copy unique file: %s to %s', src_path, dst_path)
|
|
137
|
+
hash_db[file_hash] = dst_path
|
|
138
|
+
backup_result.copied_files += 1
|
|
139
|
+
backup_result.copied_size += size
|
|
140
|
+
|
|
141
|
+
# Keep original file metadata (permission bits, time stamps, and flags)
|
|
142
|
+
shutil.copy2(src_path, dst_path)
|
|
143
|
+
else:
|
|
144
|
+
# A file with this size not backuped before -> Can't be duplicate -> copy and hash
|
|
145
|
+
file_hash = copy_and_hash(src_path, dst_path)
|
|
146
|
+
size_db.add(size)
|
|
147
|
+
hash_db[file_hash] = dst_path
|
|
148
|
+
backup_result.copied_files += 1
|
|
149
|
+
backup_result.copied_size += size
|
|
150
|
+
|
|
151
|
+
store_hash(dst_path, file_hash)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def store_hash(file_path: Path, file_hash: str):
|
|
155
|
+
"""DocWrite: README.md ## SHA256SUMS
|
|
156
|
+
A `SHA256SUMS` file is stored in each backup directory containing the SHA256 hashes of all files in that directory.
|
|
157
|
+
It's the same format as e.g.: `sha256sum * > SHA256SUMS` command produces.
|
|
158
|
+
So it's possible to verify the integrity of the backup files later.
|
|
159
|
+
e.g.:
|
|
160
|
+
```bash
|
|
161
|
+
cd .../your/backup/foobar/20240101_120000/
|
|
162
|
+
sha256sum -c SHA256SUMS
|
|
163
|
+
```
|
|
164
|
+
"""
|
|
165
|
+
hash_file_path = file_path.parent / 'SHA256SUMS'
|
|
166
|
+
with hash_file_path.open('a') as f:
|
|
167
|
+
f.write(f'{file_hash} {file_path.name}\n')
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> BackupResult:
|
|
171
|
+
src_root = src_root.resolve()
|
|
172
|
+
if not src_root.is_dir():
|
|
173
|
+
print('Error: Source directory does not exist!')
|
|
174
|
+
print(f'Please check source directory: "{src_root}"\n')
|
|
175
|
+
sys.exit(1)
|
|
176
|
+
|
|
177
|
+
backup_root = backup_root.resolve()
|
|
178
|
+
if not backup_root.is_dir():
|
|
179
|
+
print('Error: Backup directory does not exist!')
|
|
180
|
+
print(f'Please create "{backup_root}" directory first and start again!\n')
|
|
181
|
+
sys.exit(1)
|
|
182
|
+
|
|
183
|
+
if not os.access(backup_root, os.W_OK):
|
|
184
|
+
print('Error: No write access to backup directory!')
|
|
185
|
+
print(f'Please check permissions for backup directory: "{backup_root}"\n')
|
|
186
|
+
sys.exit(1)
|
|
187
|
+
|
|
188
|
+
if not supports_hardlinks(backup_root):
|
|
189
|
+
print('Error: Filesystem for backup directory does not support hardlinks!')
|
|
190
|
+
print(f'Please check backup directory: "{backup_root}"\n')
|
|
191
|
+
sys.exit(1)
|
|
192
|
+
|
|
193
|
+
# Step 1: Scan source directory:
|
|
194
|
+
src_file_count, src_total_size = humanized_fs_scan(src_root, excludes)
|
|
195
|
+
|
|
196
|
+
phlb_conf_dir = backup_root / '.phlb'
|
|
197
|
+
phlb_conf_dir.mkdir(parents=False, exist_ok=True)
|
|
198
|
+
|
|
199
|
+
backup_dir = backup_root / src_root.name / datetime.now().strftime('%Y-%m-%d-%H%M%S')
|
|
200
|
+
logger.info('Backup %s to %s', src_root, backup_dir)
|
|
201
|
+
backup_dir.mkdir(parents=True, exist_ok=False)
|
|
202
|
+
|
|
203
|
+
print(f'\nBackup to {backup_dir}...\n')
|
|
204
|
+
|
|
205
|
+
with BackupProgress(src_file_count, src_total_size) as progress:
|
|
206
|
+
# "Databases" for deduplication
|
|
207
|
+
size_db = FileSizeDatabase(phlb_conf_dir)
|
|
208
|
+
hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
|
|
209
|
+
|
|
210
|
+
backup_result = BackupResult(backup_dir=backup_dir)
|
|
211
|
+
|
|
212
|
+
next_update = 0
|
|
213
|
+
for entry in iter_scandir_files(src_root, excludes=excludes):
|
|
214
|
+
try:
|
|
215
|
+
backup_one_file(
|
|
216
|
+
src_root=src_root,
|
|
217
|
+
entry=entry,
|
|
218
|
+
size_db=size_db,
|
|
219
|
+
hash_db=hash_db,
|
|
220
|
+
backup_dir=backup_dir,
|
|
221
|
+
backup_result=backup_result,
|
|
222
|
+
)
|
|
223
|
+
except Exception as err:
|
|
224
|
+
logger.exception(f'Backup {entry.path} {err.__class__.__name__}: {err}')
|
|
225
|
+
backup_result.error_count += 1
|
|
226
|
+
else:
|
|
227
|
+
now = time.monotonic()
|
|
228
|
+
if now >= next_update:
|
|
229
|
+
progress.update(backup_count=backup_result.backup_count, backup_size=backup_result.backup_size)
|
|
230
|
+
next_update = now + 0.5
|
|
231
|
+
|
|
232
|
+
# Finalize progress indicator values:
|
|
233
|
+
progress.update(backup_count=backup_result.backup_count, backup_size=backup_result.backup_size)
|
|
234
|
+
|
|
235
|
+
print(f'\nBackup complete: {backup_dir} (total size {human_filesize(backup_result.backup_size)})\n')
|
|
236
|
+
print(f' Total files processed: {backup_result.backup_count}')
|
|
237
|
+
print(f' * Symlinked files: {backup_result.symlink_files}')
|
|
238
|
+
print(
|
|
239
|
+
f' * Hardlinked files: {backup_result.hardlinked_files}'
|
|
240
|
+
f' (saved {human_filesize(backup_result.hardlinked_size)})'
|
|
241
|
+
)
|
|
242
|
+
print(f' * Copied files: {backup_result.copied_files} (total {human_filesize(backup_result.copied_size)})')
|
|
243
|
+
print(
|
|
244
|
+
f' of which small (<{size_db.MIN_SIZE} Bytes)'
|
|
245
|
+
f' files: {backup_result.copied_small_files}'
|
|
246
|
+
f' (total {human_filesize(backup_result.copied_small_size)})'
|
|
247
|
+
)
|
|
248
|
+
if backup_result.error_count > 0:
|
|
249
|
+
print(f' Errors during backup: {backup_result.error_count} (see log for details)')
|
|
250
|
+
print()
|
|
251
|
+
|
|
252
|
+
return backup_result
|