PyHardLinkBackup 1.6.0__tar.gz → 1.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.pre-commit-config.yaml +1 -1
- pyhardlinkbackup-1.6.0/README.md → pyhardlinkbackup-1.7.1/PKG-INFO +95 -18
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/__init__.py +1 -1
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/backup.py +66 -58
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/compare_backup.py +11 -5
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/constants.py +4 -2
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/rebuild_databases.py +10 -4
- pyhardlinkbackup-1.7.1/PyHardLinkBackup/tests/test_backup.py +935 -0
- pyhardlinkbackup-1.7.1/PyHardLinkBackup/tests/test_compare_backup.py +165 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/tests/test_rebuild_database.py +3 -5
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/filesystem.py +42 -11
- pyhardlinkbackup-1.7.1/PyHardLinkBackup/utilities/rich_utils.py +248 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/tests/test_filesystem.py +11 -13
- pyhardlinkbackup-1.7.1/PyHardLinkBackup/utilities/tests/unittest_utilities.py +78 -0
- pyhardlinkbackup-1.6.0/PKG-INFO → pyhardlinkbackup-1.7.1/README.md +80 -33
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/docs/README.md +2 -2
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/pyproject.toml +1 -1
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/uv.lock +48 -48
- pyhardlinkbackup-1.6.0/PyHardLinkBackup/tests/test_backup.py +0 -706
- pyhardlinkbackup-1.6.0/PyHardLinkBackup/tests/test_compare_backup.py +0 -145
- pyhardlinkbackup-1.6.0/PyHardLinkBackup/utilities/rich_utils.py +0 -99
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.editorconfig +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.github/workflows/tests.yml +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.gitignore +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.idea/.gitignore +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.pre-commit-hooks.yaml +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.run/Template Python tests.run.xml +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.run/Unittests - __all__.run.xml +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.run/cli.py --help.run.xml +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.run/dev-cli update.run.xml +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.run/only DocTests.run.xml +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.run/only DocWrite.run.xml +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.venv-app/lib/python3.12/site-packages/cli_base/tests/shell_complete_snapshots/.gitignore +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/__main__.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_app/__init__.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_app/phlb.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_dev/__init__.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_dev/benchmark.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_dev/code_style.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_dev/packaging.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_dev/shell_completion.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_dev/testing.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_dev/update_readme_history.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/logging_setup.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/tests/__init__.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/tests/test_doc_write.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/tests/test_doctests.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/tests/test_project_setup.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/tests/test_readme.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/tests/test_readme_history.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/__init__.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/file_hash_database.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/file_size_database.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/humanize.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/sha256sums.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/tee.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/tests/__init__.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/tests/test_file_size_database.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/tyro_cli_shared_args.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/cli.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/dev-cli.py +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/dist/.gitignore +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/docs/about-docs.md +0 -0
- {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/noxfile.py +0 -0
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: PyHardLinkBackup
|
|
3
|
+
Version: 1.7.1
|
|
4
|
+
Summary: HardLink/Deduplication Backups with Python
|
|
5
|
+
Project-URL: Documentation, https://github.com/jedie/PyHardLinkBackup
|
|
6
|
+
Project-URL: Source, https://github.com/jedie/PyHardLinkBackup
|
|
7
|
+
Author-email: Jens Diemer <PyHardLinkBackup@jensdiemer.de>
|
|
8
|
+
License: GPL-3.0-or-later
|
|
9
|
+
Requires-Python: >=3.12
|
|
10
|
+
Requires-Dist: bx-py-utils
|
|
11
|
+
Requires-Dist: cli-base-utilities>=0.27.1
|
|
12
|
+
Requires-Dist: rich
|
|
13
|
+
Requires-Dist: tyro
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
1
16
|
# PyHardLinkBackup
|
|
2
17
|
|
|
3
18
|
[](https://github.com/jedie/PyHardLinkBackup/actions/workflows/tests.yml)
|
|
@@ -6,11 +21,20 @@
|
|
|
6
21
|
[](https://github.com/jedie/PyHardLinkBackup/blob/main/pyproject.toml)
|
|
7
22
|
[](https://github.com/jedie/PyHardLinkBackup/blob/main/LICENSE)
|
|
8
23
|
|
|
9
|
-
|
|
24
|
+
PyHardLinkBackup is a cross-platform backup tool designed for efficient, reliable, and accessible backups.
|
|
25
|
+
Similar to `rsync --link-dest`, but with global deduplication across all backups and all paths, not just between two directories.
|
|
26
|
+
|
|
27
|
+
Some aspects:
|
|
28
|
+
|
|
29
|
+
- Creates deduplicated, versioned backups using hardlinks, minimizing storage usage by linking identical files across all backup snapshots.
|
|
30
|
+
- Employs a global deduplication database (by file size and SHA256 hash) per backup root, ensuring that duplicate files are detected and hardlinked even if they are moved or renamed between backups.
|
|
31
|
+
- Backups are stored as regular files and directories—no proprietary formats—so you can access your data directly without special tools.
|
|
32
|
+
- Deleting old snapshots does not affect the integrity of remaining backups.
|
|
33
|
+
- Linux and macOS are fully supported (Windows support is experimental)
|
|
10
34
|
|
|
11
|
-
|
|
35
|
+
Limitations:
|
|
12
36
|
|
|
13
|
-
|
|
37
|
+
- Requires a filesystem that supports hardlinks (e.g., btrfs, zfs, ext4, APFS, NTFS with limitations).
|
|
14
38
|
|
|
15
39
|
## installation
|
|
16
40
|
|
|
@@ -23,7 +47,16 @@ pipx install PyHardLinkBackup
|
|
|
23
47
|
```
|
|
24
48
|
|
|
25
49
|
After this you can call the CLI via `phlb` command.
|
|
26
|
-
The main command is `phlb backup <source> <destination
|
|
50
|
+
The main command is `phlb backup <source> <destination>` to create a backup.
|
|
51
|
+
|
|
52
|
+
e.g.:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
phlb backup /path/to/source /path/to/destination
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
This will create a snapshot in `/path/to/destination` using hard links for deduplication. You can safely delete old snapshots without affecting others.
|
|
59
|
+
|
|
27
60
|
|
|
28
61
|
[comment]: <> (✂✂✂ auto generated backup help start ✂✂✂)
|
|
29
62
|
```
|
|
@@ -48,20 +81,59 @@ Backup the source directory to the destination directory using hard links for de
|
|
|
48
81
|
[comment]: <> (✂✂✂ auto generated backup help end ✂✂✂)
|
|
49
82
|
|
|
50
83
|
|
|
84
|
+
## Screenshots
|
|
85
|
+
### Screenshot - running a backup
|
|
86
|
+
|
|
87
|
+
----
|
|
88
|
+
|
|
89
|
+

|
|
90
|
+
|
|
91
|
+
----
|
|
92
|
+
|
|
93
|
+
### Screenshot - backup finished
|
|
94
|
+
|
|
95
|
+
----
|
|
96
|
+
|
|
97
|
+

|
|
98
|
+
|
|
99
|
+
----
|
|
100
|
+
|
|
101
|
+
(more screenshots here: [jedie.github.io/tree/main/screenshots/PyHardLinkBackup](https://github.com/jedie/jedie.github.io/tree/main/screenshots/PyHardLinkBackup))
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
### update
|
|
105
|
+
|
|
106
|
+
If you use pipx, just call:
|
|
107
|
+
```bash
|
|
108
|
+
pipx upgrade PyHardLinkBackup
|
|
109
|
+
```
|
|
110
|
+
see: https://pipx.pypa.io/stable/docs/#pipx-upgrade
|
|
51
111
|
|
|
52
|
-
Running a backup looks like:
|
|
53
112
|
|
|
54
|
-
|
|
113
|
+
### Troubleshooting
|
|
55
114
|
|
|
115
|
+
- **Permission Errors:** Ensure you have read access to source and write access to destination.
|
|
116
|
+
- **Hardlink Limits:** Some filesystems (e.g., NTFS) have limits on the number of hardlinks per file.
|
|
117
|
+
- **Symlink Handling:** Broken symlinks are handled gracefully; see logs for details.
|
|
118
|
+
- **Backup Deletion:** Deleting a snapshot does not affect deduplication of other backups.
|
|
119
|
+
- **Log Files:** Check the log file in each backup directory for error details.
|
|
56
120
|
|
|
57
121
|
|
|
58
|
-
|
|
122
|
+
To lower the priority of the backup process (useful to reduce system impact during heavy backups), you can use `nice` and `ionice` on Linux systems:
|
|
59
123
|
|
|
60
|
-
|
|
124
|
+
```bash
|
|
125
|
+
nice -n 19 ionice -c3 phlb backup /path/to/source /path/to/destination
|
|
126
|
+
```
|
|
127
|
+
- `nice -n 19` sets the lowest CPU priority.
|
|
128
|
+
- `ionice -c3` sets the lowest I/O priority (idle class).
|
|
61
129
|
|
|
130
|
+
Adjust priority of an already running backup:
|
|
131
|
+
```bash
|
|
132
|
+
renice 19 -p $(pgrep phlb) && ionice -c3 -p $(pgrep phlb)
|
|
133
|
+
```
|
|
62
134
|
|
|
63
135
|
|
|
64
|
-
complete help for main CLI app
|
|
136
|
+
### complete help for main CLI app
|
|
65
137
|
|
|
66
138
|
[comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
|
|
67
139
|
```
|
|
@@ -84,13 +156,7 @@ usage: phlb [-h] {backup,compare,rebuild,version}
|
|
|
84
156
|
[comment]: <> (✂✂✂ auto generated main help end ✂✂✂)
|
|
85
157
|
|
|
86
158
|
|
|
87
|
-
### update
|
|
88
159
|
|
|
89
|
-
If you use pipx, just call:
|
|
90
|
-
```bash
|
|
91
|
-
pipx upgrade PyHardLinkBackup
|
|
92
|
-
```
|
|
93
|
-
see: https://pipx.pypa.io/stable/docs/#pipx-upgrade
|
|
94
160
|
|
|
95
161
|
|
|
96
162
|
## concept
|
|
@@ -217,21 +283,32 @@ Overview of main changes:
|
|
|
217
283
|
|
|
218
284
|
[comment]: <> (✂✂✂ auto generated history start ✂✂✂)
|
|
219
285
|
|
|
286
|
+
* [v1.7.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.7.0...v1.7.1)
|
|
287
|
+
* 2026-01-19 - Update requirements to fix problems under Windows
|
|
288
|
+
* [v1.7.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.6.0...v1.7.0)
|
|
289
|
+
* 2026-01-19 - Speedup and enhance unittest
|
|
290
|
+
* 2026-01-17 - Remove unfinished copied files on errors
|
|
291
|
+
* 2026-01-17 - Display/update progress on very lage files #75 and enhance all bars
|
|
292
|
+
* 2026-01-18 - Expand tests: Check file open calls
|
|
293
|
+
* 2026-01-17 - expand tests
|
|
294
|
+
* 2026-01-17 - simplify tests
|
|
295
|
+
* 2026-01-17 - Warn if broken symlink found
|
|
296
|
+
* 2026-01-17 - Update README
|
|
220
297
|
* [v1.6.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.5.0...v1.6.0)
|
|
221
298
|
* 2026-01-17 - Fix flaky test, because of terminal size
|
|
222
299
|
* 2026-01-17 - Bugfix: Don't hash new large files twice
|
|
223
300
|
* 2026-01-17 - Use compare also in backup tests
|
|
224
301
|
* [v1.5.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.4.1...v1.5.0)
|
|
225
302
|
* 2026-01-17 - NEW: Compare command to verify source tree with last backup
|
|
303
|
+
|
|
304
|
+
<details><summary>Expand older history entries ...</summary>
|
|
305
|
+
|
|
226
306
|
* [v1.4.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.4.0...v1.4.1)
|
|
227
307
|
* 2026-01-16 - Bugfix large file handling
|
|
228
308
|
* [v1.4.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.3.0...v1.4.0)
|
|
229
309
|
* 2026-01-16 - Create log file in backup and a summary.txt
|
|
230
310
|
* 2026-01-16 - Run CI tests on macos, too.
|
|
231
311
|
* 2026-01-16 - add dev cli command "scan-benchmark"
|
|
232
|
-
|
|
233
|
-
<details><summary>Expand older history entries ...</summary>
|
|
234
|
-
|
|
235
312
|
* [v1.3.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.2.0...v1.3.0)
|
|
236
313
|
* 2026-01-15 - Verify SHA256SUMS files in "rebuild" command, too.
|
|
237
314
|
* 2026-01-15 - Code cleanup: use more generic names for and in BackupProgress
|
|
@@ -14,6 +14,7 @@ from PyHardLinkBackup.logging_setup import LoggingManager
|
|
|
14
14
|
from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
|
|
15
15
|
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
16
16
|
from PyHardLinkBackup.utilities.filesystem import (
|
|
17
|
+
RemoveFileOnError,
|
|
17
18
|
copy_and_hash,
|
|
18
19
|
hash_file,
|
|
19
20
|
humanized_fs_scan,
|
|
@@ -59,6 +60,7 @@ def backup_one_file(
|
|
|
59
60
|
hash_db: FileHashDatabase,
|
|
60
61
|
backup_dir: Path,
|
|
61
62
|
backup_result: BackupResult,
|
|
63
|
+
progress: DisplayFileTreeProgress,
|
|
62
64
|
) -> None:
|
|
63
65
|
backup_result.backup_count += 1
|
|
64
66
|
src_path = Path(entry.path)
|
|
@@ -70,8 +72,8 @@ def backup_one_file(
|
|
|
70
72
|
|
|
71
73
|
try:
|
|
72
74
|
size = entry.stat().st_size
|
|
73
|
-
except FileNotFoundError:
|
|
74
|
-
|
|
75
|
+
except FileNotFoundError as err:
|
|
76
|
+
logger.warning(f'Broken symlink {src_path}: {err.__class__.__name__}: {err}')
|
|
75
77
|
target = os.readlink(src_path)
|
|
76
78
|
dst_path.symlink_to(target)
|
|
77
79
|
backup_result.symlink_files += 1
|
|
@@ -95,65 +97,66 @@ def backup_one_file(
|
|
|
95
97
|
# Process regular files
|
|
96
98
|
assert entry.is_file(follow_symlinks=False), f'Unexpected non-file: {src_path}'
|
|
97
99
|
|
|
98
|
-
|
|
100
|
+
with RemoveFileOnError(dst_path):
|
|
101
|
+
# Deduplication logic
|
|
102
|
+
|
|
103
|
+
if size < size_db.MIN_SIZE:
|
|
104
|
+
# Small file -> always copy without deduplication
|
|
105
|
+
logger.info('Copy small file: %s to %s', src_path, dst_path)
|
|
106
|
+
file_hash = copy_and_hash(src_path, dst_path, progress=progress, total_size=size)
|
|
107
|
+
backup_result.copied_files += 1
|
|
108
|
+
backup_result.copied_size += size
|
|
109
|
+
backup_result.copied_small_files += 1
|
|
110
|
+
backup_result.copied_small_size += size
|
|
111
|
+
store_hash(dst_path, file_hash)
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
if size in size_db:
|
|
115
|
+
logger.debug('File with size %iBytes found before -> hash: %s', size, src_path)
|
|
116
|
+
|
|
117
|
+
if size <= CHUNK_SIZE:
|
|
118
|
+
# File can be read complete into memory
|
|
119
|
+
logger.debug('File size %iBytes <= CHUNK_SIZE (%iBytes) -> read complete into memory', size, CHUNK_SIZE)
|
|
120
|
+
file_content, file_hash = read_and_hash_file(src_path)
|
|
121
|
+
if existing_path := hash_db.get(file_hash):
|
|
122
|
+
logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
|
|
123
|
+
os.link(existing_path, dst_path)
|
|
124
|
+
backup_result.hardlinked_files += 1
|
|
125
|
+
backup_result.hardlinked_size += size
|
|
126
|
+
else:
|
|
127
|
+
logger.info('Store unique file: %s to %s', src_path, dst_path)
|
|
128
|
+
dst_path.write_bytes(file_content)
|
|
129
|
+
hash_db[file_hash] = dst_path
|
|
130
|
+
backup_result.copied_files += 1
|
|
131
|
+
backup_result.copied_size += size
|
|
99
132
|
|
|
100
|
-
if size < size_db.MIN_SIZE:
|
|
101
|
-
# Small file -> always copy without deduplication
|
|
102
|
-
logger.info('Copy small file: %s to %s', src_path, dst_path)
|
|
103
|
-
file_hash = copy_and_hash(src_path, dst_path)
|
|
104
|
-
backup_result.copied_files += 1
|
|
105
|
-
backup_result.copied_size += size
|
|
106
|
-
backup_result.copied_small_files += 1
|
|
107
|
-
backup_result.copied_small_size += size
|
|
108
|
-
store_hash(dst_path, file_hash)
|
|
109
|
-
return
|
|
110
|
-
|
|
111
|
-
if size in size_db:
|
|
112
|
-
logger.debug('File with size %iBytes found before -> hash: %s', size, src_path)
|
|
113
|
-
|
|
114
|
-
if size <= CHUNK_SIZE:
|
|
115
|
-
# File can be read complete into memory
|
|
116
|
-
logger.debug('File size %iBytes <= CHUNK_SIZE (%iBytes) -> read complete into memory', size, CHUNK_SIZE)
|
|
117
|
-
file_content, file_hash = read_and_hash_file(src_path)
|
|
118
|
-
if existing_path := hash_db.get(file_hash):
|
|
119
|
-
logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
|
|
120
|
-
os.link(existing_path, dst_path)
|
|
121
|
-
backup_result.hardlinked_files += 1
|
|
122
|
-
backup_result.hardlinked_size += size
|
|
123
133
|
else:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
134
|
+
# Large file
|
|
135
|
+
file_hash = hash_file(src_path, progress=progress, total_size=size) # Calculate hash without copying
|
|
136
|
+
|
|
137
|
+
if existing_path := hash_db.get(file_hash):
|
|
138
|
+
logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
|
|
139
|
+
os.link(existing_path, dst_path)
|
|
140
|
+
backup_result.hardlinked_files += 1
|
|
141
|
+
backup_result.hardlinked_size += size
|
|
142
|
+
else:
|
|
143
|
+
logger.info('Copy unique file: %s to %s', src_path, dst_path)
|
|
144
|
+
shutil.copyfile(src_path, dst_path)
|
|
145
|
+
hash_db[file_hash] = dst_path
|
|
146
|
+
backup_result.copied_files += 1
|
|
147
|
+
backup_result.copied_size += size
|
|
148
|
+
|
|
149
|
+
# Keep original file metadata (permission bits, time stamps, and flags)
|
|
150
|
+
shutil.copystat(src_path, dst_path)
|
|
130
151
|
else:
|
|
131
|
-
#
|
|
132
|
-
file_hash =
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
backup_result.hardlinked_files += 1
|
|
138
|
-
backup_result.hardlinked_size += size
|
|
139
|
-
else:
|
|
140
|
-
logger.info('Copy unique file: %s to %s', src_path, dst_path)
|
|
141
|
-
shutil.copyfile(src_path, dst_path)
|
|
142
|
-
hash_db[file_hash] = dst_path
|
|
143
|
-
backup_result.copied_files += 1
|
|
144
|
-
backup_result.copied_size += size
|
|
145
|
-
|
|
146
|
-
# Keep original file metadata (permission bits, time stamps, and flags)
|
|
147
|
-
shutil.copystat(src_path, dst_path)
|
|
148
|
-
else:
|
|
149
|
-
# A file with this size not backuped before -> Can't be duplicate -> copy and hash
|
|
150
|
-
file_hash = copy_and_hash(src_path, dst_path)
|
|
151
|
-
size_db.add(size)
|
|
152
|
-
hash_db[file_hash] = dst_path
|
|
153
|
-
backup_result.copied_files += 1
|
|
154
|
-
backup_result.copied_size += size
|
|
152
|
+
# A file with this size not backuped before -> Can't be duplicate -> copy and hash
|
|
153
|
+
file_hash = copy_and_hash(src_path, dst_path, progress=progress, total_size=size)
|
|
154
|
+
size_db.add(size)
|
|
155
|
+
hash_db[file_hash] = dst_path
|
|
156
|
+
backup_result.copied_files += 1
|
|
157
|
+
backup_result.copied_size += size
|
|
155
158
|
|
|
156
|
-
|
|
159
|
+
store_hash(dst_path, file_hash)
|
|
157
160
|
|
|
158
161
|
|
|
159
162
|
def backup_tree(
|
|
@@ -205,7 +208,11 @@ def backup_tree(
|
|
|
205
208
|
|
|
206
209
|
print(f'\nBackup to {backup_dir}...\n')
|
|
207
210
|
|
|
208
|
-
with DisplayFileTreeProgress(
|
|
211
|
+
with DisplayFileTreeProgress(
|
|
212
|
+
description=f'Backup {src_root}...',
|
|
213
|
+
total_file_count=src_file_count,
|
|
214
|
+
total_size=src_total_size,
|
|
215
|
+
) as progress:
|
|
209
216
|
# "Databases" for deduplication
|
|
210
217
|
size_db = FileSizeDatabase(phlb_conf_dir)
|
|
211
218
|
hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
|
|
@@ -222,6 +229,7 @@ def backup_tree(
|
|
|
222
229
|
hash_db=hash_db,
|
|
223
230
|
backup_dir=backup_dir,
|
|
224
231
|
backup_result=backup_result,
|
|
232
|
+
progress=progress,
|
|
225
233
|
)
|
|
226
234
|
except Exception as err:
|
|
227
235
|
logger.exception(f'Backup {entry.path} {err.__class__.__name__}: {err}')
|
|
@@ -53,6 +53,7 @@ def compare_one_file(
|
|
|
53
53
|
hash_db: FileHashDatabase,
|
|
54
54
|
compare_dir: Path,
|
|
55
55
|
compare_result: CompareResult,
|
|
56
|
+
progress: DisplayFileTreeProgress,
|
|
56
57
|
) -> None:
|
|
57
58
|
src_size = entry.stat().st_size
|
|
58
59
|
|
|
@@ -80,8 +81,8 @@ def compare_one_file(
|
|
|
80
81
|
compare_result.file_size_missmatch += 1
|
|
81
82
|
return
|
|
82
83
|
|
|
83
|
-
src_hash = hash_file(src_path)
|
|
84
|
-
dst_hash = hash_file(dst_path)
|
|
84
|
+
src_hash = hash_file(src_path, progress=progress, total_size=src_size)
|
|
85
|
+
dst_hash = hash_file(dst_path, progress=progress, total_size=dst_size)
|
|
85
86
|
|
|
86
87
|
if src_hash != dst_hash:
|
|
87
88
|
logger.warning(
|
|
@@ -158,7 +159,11 @@ def compare_tree(
|
|
|
158
159
|
with PrintTimingContextManager('Filesystem scan completed in'):
|
|
159
160
|
src_file_count, src_total_size = humanized_fs_scan(src_root, excludes=excludes)
|
|
160
161
|
|
|
161
|
-
with DisplayFileTreeProgress(
|
|
162
|
+
with DisplayFileTreeProgress(
|
|
163
|
+
description=f'Compare {src_root}...',
|
|
164
|
+
total_file_count=src_file_count,
|
|
165
|
+
total_size=src_total_size,
|
|
166
|
+
) as progress:
|
|
162
167
|
# init "databases":
|
|
163
168
|
size_db = FileSizeDatabase(phlb_conf_dir)
|
|
164
169
|
hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
|
|
@@ -175,6 +180,7 @@ def compare_tree(
|
|
|
175
180
|
hash_db=hash_db,
|
|
176
181
|
compare_dir=compare_dir,
|
|
177
182
|
compare_result=compare_result,
|
|
183
|
+
progress=progress,
|
|
178
184
|
)
|
|
179
185
|
except Exception as err:
|
|
180
186
|
logger.exception(f'Compare {entry.path} {err.__class__.__name__}: {err}')
|
|
@@ -184,12 +190,12 @@ def compare_tree(
|
|
|
184
190
|
if now >= next_update:
|
|
185
191
|
progress.update(
|
|
186
192
|
completed_file_count=compare_result.total_file_count,
|
|
187
|
-
|
|
193
|
+
advance_size=compare_result.total_size,
|
|
188
194
|
)
|
|
189
195
|
next_update = now + 0.5
|
|
190
196
|
|
|
191
197
|
# Finalize progress indicator values:
|
|
192
|
-
progress.update(completed_file_count=compare_result.total_file_count,
|
|
198
|
+
progress.update(completed_file_count=compare_result.total_file_count, advance_size=compare_result.total_size)
|
|
193
199
|
|
|
194
200
|
summary_file = compare_main_dir / f'{now_timestamp}-summary.txt'
|
|
195
201
|
with TeeStdoutContext(summary_file):
|
|
@@ -11,6 +11,8 @@ BASE_PATH = Path(PyHardLinkBackup.__file__).parent
|
|
|
11
11
|
##########################################################################
|
|
12
12
|
# "Settings" for PyHardLinkBackup:
|
|
13
13
|
|
|
14
|
-
CHUNK_SIZE = 64 * 1024 * 1024 # 64 MB
|
|
15
|
-
SMALL_FILE_THRESHOLD = 1000 # bytes
|
|
16
14
|
HASH_ALGO = 'sha256'
|
|
15
|
+
SMALL_FILE_THRESHOLD = 1000 # bytes
|
|
16
|
+
CHUNK_SIZE = 64 * 1024 * 1024 # 64 MB
|
|
17
|
+
LAGE_FILE_PROGRESS_MIN_SIZE = CHUNK_SIZE * 3
|
|
18
|
+
|
|
@@ -41,6 +41,7 @@ def rebuild_one_file(
|
|
|
41
41
|
size_db: FileSizeDatabase,
|
|
42
42
|
hash_db: FileHashDatabase,
|
|
43
43
|
rebuild_result: RebuildResult,
|
|
44
|
+
progress: DisplayFileTreeProgress,
|
|
44
45
|
):
|
|
45
46
|
file_path = Path(entry.path)
|
|
46
47
|
|
|
@@ -62,7 +63,7 @@ def rebuild_one_file(
|
|
|
62
63
|
# Small files will never deduplicate, skip them
|
|
63
64
|
return
|
|
64
65
|
|
|
65
|
-
file_hash = hash_file(file_path)
|
|
66
|
+
file_hash = hash_file(file_path, progress=progress, total_size=size)
|
|
66
67
|
|
|
67
68
|
if size not in size_db:
|
|
68
69
|
size_db.add(size)
|
|
@@ -121,7 +122,11 @@ def rebuild(
|
|
|
121
122
|
file_count -= 1
|
|
122
123
|
total_size -= file.stat().st_size
|
|
123
124
|
|
|
124
|
-
with DisplayFileTreeProgress(
|
|
125
|
+
with DisplayFileTreeProgress(
|
|
126
|
+
description=f'Rebuild {backup_root}...',
|
|
127
|
+
total_file_count=file_count,
|
|
128
|
+
total_size=total_size,
|
|
129
|
+
) as progress:
|
|
125
130
|
# "Databases" for deduplication
|
|
126
131
|
size_db = FileSizeDatabase(phlb_conf_dir)
|
|
127
132
|
hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
|
|
@@ -137,6 +142,7 @@ def rebuild(
|
|
|
137
142
|
size_db=size_db,
|
|
138
143
|
hash_db=hash_db,
|
|
139
144
|
rebuild_result=rebuild_result,
|
|
145
|
+
progress=progress,
|
|
140
146
|
)
|
|
141
147
|
except Exception as err:
|
|
142
148
|
logger.exception(f'Backup {entry.path} {err.__class__.__name__}: {err}')
|
|
@@ -145,12 +151,12 @@ def rebuild(
|
|
|
145
151
|
now = time.monotonic()
|
|
146
152
|
if now >= next_update:
|
|
147
153
|
progress.update(
|
|
148
|
-
completed_file_count=rebuild_result.process_count,
|
|
154
|
+
completed_file_count=rebuild_result.process_count, advance_size=rebuild_result.process_size
|
|
149
155
|
)
|
|
150
156
|
next_update = now + 0.5
|
|
151
157
|
|
|
152
158
|
# Finalize progress indicator values:
|
|
153
|
-
progress.update(completed_file_count=rebuild_result.process_count,
|
|
159
|
+
progress.update(completed_file_count=rebuild_result.process_count, advance_size=rebuild_result.process_size)
|
|
154
160
|
|
|
155
161
|
summary_file = backup_root / f'{timestamp}-rebuild-summary.txt'
|
|
156
162
|
with TeeStdoutContext(summary_file):
|