PyHardLinkBackup 1.5.0__tar.gz → 1.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PKG-INFO +83 -19
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/__init__.py +1 -1
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/backup.py +66 -58
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/compare_backup.py +13 -6
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/constants.py +4 -2
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/rebuild_databases.py +10 -4
- pyhardlinkbackup-1.7.0/PyHardLinkBackup/tests/test_backup.py +935 -0
- pyhardlinkbackup-1.7.0/PyHardLinkBackup/tests/test_compare_backup.py +165 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/tests/test_rebuild_database.py +3 -5
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/filesystem.py +42 -11
- pyhardlinkbackup-1.7.0/PyHardLinkBackup/utilities/rich_utils.py +248 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/tests/test_filesystem.py +11 -13
- pyhardlinkbackup-1.7.0/PyHardLinkBackup/utilities/tests/unittest_utilities.py +78 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/README.md +82 -18
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/docs/README.md +2 -2
- pyhardlinkbackup-1.5.0/PyHardLinkBackup/tests/test_backup.py +0 -628
- pyhardlinkbackup-1.5.0/PyHardLinkBackup/tests/test_compare_backup.py +0 -86
- pyhardlinkbackup-1.5.0/PyHardLinkBackup/utilities/rich_utils.py +0 -99
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.editorconfig +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.github/workflows/tests.yml +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.gitignore +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.idea/.gitignore +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.pre-commit-config.yaml +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.pre-commit-hooks.yaml +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.run/Template Python tests.run.xml +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.run/Unittests - __all__.run.xml +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.run/cli.py --help.run.xml +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.run/dev-cli update.run.xml +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.run/only DocTests.run.xml +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.run/only DocWrite.run.xml +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.venv-app/lib/python3.12/site-packages/cli_base/tests/shell_complete_snapshots/.gitignore +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/__main__.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_app/__init__.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_app/phlb.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_dev/__init__.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_dev/benchmark.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_dev/code_style.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_dev/packaging.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_dev/shell_completion.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_dev/testing.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_dev/update_readme_history.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/logging_setup.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/tests/__init__.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/tests/test_doc_write.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/tests/test_doctests.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/tests/test_project_setup.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/tests/test_readme.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/tests/test_readme_history.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/__init__.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/file_hash_database.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/file_size_database.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/humanize.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/sha256sums.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/tee.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/tests/__init__.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/tests/test_file_size_database.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/tyro_cli_shared_args.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/cli.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/dev-cli.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/dist/.gitignore +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/docs/about-docs.md +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/noxfile.py +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/pyproject.toml +0 -0
- {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: PyHardLinkBackup
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: HardLink/Deduplication Backups with Python
|
|
5
5
|
Project-URL: Documentation, https://github.com/jedie/PyHardLinkBackup
|
|
6
6
|
Project-URL: Source, https://github.com/jedie/PyHardLinkBackup
|
|
@@ -21,11 +21,20 @@ Description-Content-Type: text/markdown
|
|
|
21
21
|
[](https://github.com/jedie/PyHardLinkBackup/blob/main/pyproject.toml)
|
|
22
22
|
[](https://github.com/jedie/PyHardLinkBackup/blob/main/LICENSE)
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
PyHardLinkBackup is a cross-platform backup tool designed for efficient, reliable, and accessible backups.
|
|
25
|
+
Similar to `rsync --link-dest`, but with global deduplication across all backups and all paths, not just between two directories.
|
|
25
26
|
|
|
26
|
-
|
|
27
|
+
Some aspects:
|
|
27
28
|
|
|
28
|
-
|
|
29
|
+
- Creates deduplicated, versioned backups using hardlinks, minimizing storage usage by linking identical files across all backup snapshots.
|
|
30
|
+
- Employs a global deduplication database (by file size and SHA256 hash) per backup root, ensuring that duplicate files are detected and hardlinked even if they are moved or renamed between backups.
|
|
31
|
+
- Backups are stored as regular files and directories—no proprietary formats—so you can access your data directly without special tools.
|
|
32
|
+
- Deleting old snapshots does not affect the integrity of remaining backups.
|
|
33
|
+
- Linux and macOS are fully supported (Windows support is experimental)
|
|
34
|
+
|
|
35
|
+
Limitations:
|
|
36
|
+
|
|
37
|
+
- Requires a filesystem that supports hardlinks (e.g., btrfs, zfs, ext4, APFS, NTFS with limitations).
|
|
29
38
|
|
|
30
39
|
## installation
|
|
31
40
|
|
|
@@ -38,7 +47,16 @@ pipx install PyHardLinkBackup
|
|
|
38
47
|
```
|
|
39
48
|
|
|
40
49
|
After this you can call the CLI via `phlb` command.
|
|
41
|
-
The main command is `phlb backup <source> <destination
|
|
50
|
+
The main command is `phlb backup <source> <destination>` to create a backup.
|
|
51
|
+
|
|
52
|
+
e.g.:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
phlb backup /path/to/source /path/to/destination
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
This will create a snapshot in `/path/to/destination` using hard links for deduplication. You can safely delete old snapshots without affecting others.
|
|
59
|
+
|
|
42
60
|
|
|
43
61
|
[comment]: <> (✂✂✂ auto generated backup help start ✂✂✂)
|
|
44
62
|
```
|
|
@@ -63,20 +81,59 @@ Backup the source directory to the destination directory using hard links for de
|
|
|
63
81
|
[comment]: <> (✂✂✂ auto generated backup help end ✂✂✂)
|
|
64
82
|
|
|
65
83
|
|
|
84
|
+
## Screenshots
|
|
85
|
+
### Screenshot - running a backup
|
|
86
|
+
|
|
87
|
+
----
|
|
88
|
+
|
|
89
|
+

|
|
90
|
+
|
|
91
|
+
----
|
|
66
92
|
|
|
67
|
-
|
|
93
|
+
### Screenshot - backup finished
|
|
68
94
|
|
|
69
|
-
|
|
95
|
+
----
|
|
70
96
|
|
|
97
|
+

|
|
71
98
|
|
|
99
|
+
----
|
|
100
|
+
|
|
101
|
+
(more screenshots here: [jedie.github.io/tree/main/screenshots/PyHardLinkBackup](https://github.com/jedie/jedie.github.io/tree/main/screenshots/PyHardLinkBackup))
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
### update
|
|
105
|
+
|
|
106
|
+
If you use pipx, just call:
|
|
107
|
+
```bash
|
|
108
|
+
pipx upgrade PyHardLinkBackup
|
|
109
|
+
```
|
|
110
|
+
see: https://pipx.pypa.io/stable/docs/#pipx-upgrade
|
|
72
111
|
|
|
73
|
-
If it's finished it display a summary:
|
|
74
112
|
|
|
75
|
-
|
|
113
|
+
### Troubleshooting
|
|
76
114
|
|
|
115
|
+
- **Permission Errors:** Ensure you have read access to source and write access to destination.
|
|
116
|
+
- **Hardlink Limits:** Some filesystems (e.g., NTFS) have limits on the number of hardlinks per file.
|
|
117
|
+
- **Symlink Handling:** Broken symlinks are handled gracefully; see logs for details.
|
|
118
|
+
- **Backup Deletion:** Deleting a snapshot does not affect deduplication of other backups.
|
|
119
|
+
- **Log Files:** Check the log file in each backup directory for error details.
|
|
77
120
|
|
|
78
121
|
|
|
79
|
-
|
|
122
|
+
To lower the priority of the backup process (useful to reduce system impact during heavy backups), you can use `nice` and `ionice` on Linux systems:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
nice -n 19 ionice -c3 phlb backup /path/to/source /path/to/destination
|
|
126
|
+
```
|
|
127
|
+
- `nice -n 19` sets the lowest CPU priority.
|
|
128
|
+
- `ionice -c3` sets the lowest I/O priority (idle class).
|
|
129
|
+
|
|
130
|
+
Adjust priority of an already running backup:
|
|
131
|
+
```bash
|
|
132
|
+
renice 19 -p $(pgrep phlb) && ionice -c3 -p $(pgrep phlb)
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
### complete help for main CLI app
|
|
80
137
|
|
|
81
138
|
[comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
|
|
82
139
|
```
|
|
@@ -99,13 +156,7 @@ usage: phlb [-h] {backup,compare,rebuild,version}
|
|
|
99
156
|
[comment]: <> (✂✂✂ auto generated main help end ✂✂✂)
|
|
100
157
|
|
|
101
158
|
|
|
102
|
-
### update
|
|
103
159
|
|
|
104
|
-
If you use pipx, just call:
|
|
105
|
-
```bash
|
|
106
|
-
pipx upgrade PyHardLinkBackup
|
|
107
|
-
```
|
|
108
|
-
see: https://pipx.pypa.io/stable/docs/#pipx-upgrade
|
|
109
160
|
|
|
110
161
|
|
|
111
162
|
## concept
|
|
@@ -232,10 +283,26 @@ Overview of main changes:
|
|
|
232
283
|
|
|
233
284
|
[comment]: <> (✂✂✂ auto generated history start ✂✂✂)
|
|
234
285
|
|
|
286
|
+
* [v1.7.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.6.0...v1.7.0)
|
|
287
|
+
* 2026-01-19 - Speedup and enhance unittest
|
|
288
|
+
* 2026-01-17 - Remove unfinished copied files on errors
|
|
289
|
+
* 2026-01-17 - Display/update progress on very lage files #75 and enhance all bars
|
|
290
|
+
* 2026-01-18 - Expand tests: Check file open calls
|
|
291
|
+
* 2026-01-17 - expand tests
|
|
292
|
+
* 2026-01-17 - simplify tests
|
|
293
|
+
* 2026-01-17 - Warn if broken symlink found
|
|
294
|
+
* 2026-01-17 - Update README
|
|
295
|
+
* [v1.6.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.5.0...v1.6.0)
|
|
296
|
+
* 2026-01-17 - Fix flaky test, because of terminal size
|
|
297
|
+
* 2026-01-17 - Bugfix: Don't hash new large files twice
|
|
298
|
+
* 2026-01-17 - Use compare also in backup tests
|
|
235
299
|
* [v1.5.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.4.1...v1.5.0)
|
|
236
300
|
* 2026-01-17 - NEW: Compare command to verify source tree with last backup
|
|
237
301
|
* [v1.4.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.4.0...v1.4.1)
|
|
238
302
|
* 2026-01-16 - Bugfix large file handling
|
|
303
|
+
|
|
304
|
+
<details><summary>Expand older history entries ...</summary>
|
|
305
|
+
|
|
239
306
|
* [v1.4.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.3.0...v1.4.0)
|
|
240
307
|
* 2026-01-16 - Create log file in backup and a summary.txt
|
|
241
308
|
* 2026-01-16 - Run CI tests on macos, too.
|
|
@@ -246,9 +313,6 @@ Overview of main changes:
|
|
|
246
313
|
* 2026-01-15 - Add tests for rebuild
|
|
247
314
|
* 2026-01-15 - Add command to "rebuld" the size and hash filesystem database
|
|
248
315
|
* 2026-01-15 - Add screenshots in the README
|
|
249
|
-
|
|
250
|
-
<details><summary>Expand older history entries ...</summary>
|
|
251
|
-
|
|
252
316
|
* [v1.2.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.1.0...v1.2.0)
|
|
253
317
|
* 2026-01-15 - Add error handling: Log exception but continue with the backup
|
|
254
318
|
* 2026-01-15 - Check permission and hadlink support on destination path
|
|
@@ -14,6 +14,7 @@ from PyHardLinkBackup.logging_setup import LoggingManager
|
|
|
14
14
|
from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
|
|
15
15
|
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
16
16
|
from PyHardLinkBackup.utilities.filesystem import (
|
|
17
|
+
RemoveFileOnError,
|
|
17
18
|
copy_and_hash,
|
|
18
19
|
hash_file,
|
|
19
20
|
humanized_fs_scan,
|
|
@@ -59,6 +60,7 @@ def backup_one_file(
|
|
|
59
60
|
hash_db: FileHashDatabase,
|
|
60
61
|
backup_dir: Path,
|
|
61
62
|
backup_result: BackupResult,
|
|
63
|
+
progress: DisplayFileTreeProgress,
|
|
62
64
|
) -> None:
|
|
63
65
|
backup_result.backup_count += 1
|
|
64
66
|
src_path = Path(entry.path)
|
|
@@ -70,8 +72,8 @@ def backup_one_file(
|
|
|
70
72
|
|
|
71
73
|
try:
|
|
72
74
|
size = entry.stat().st_size
|
|
73
|
-
except FileNotFoundError:
|
|
74
|
-
|
|
75
|
+
except FileNotFoundError as err:
|
|
76
|
+
logger.warning(f'Broken symlink {src_path}: {err.__class__.__name__}: {err}')
|
|
75
77
|
target = os.readlink(src_path)
|
|
76
78
|
dst_path.symlink_to(target)
|
|
77
79
|
backup_result.symlink_files += 1
|
|
@@ -95,65 +97,66 @@ def backup_one_file(
|
|
|
95
97
|
# Process regular files
|
|
96
98
|
assert entry.is_file(follow_symlinks=False), f'Unexpected non-file: {src_path}'
|
|
97
99
|
|
|
98
|
-
|
|
100
|
+
with RemoveFileOnError(dst_path):
|
|
101
|
+
# Deduplication logic
|
|
102
|
+
|
|
103
|
+
if size < size_db.MIN_SIZE:
|
|
104
|
+
# Small file -> always copy without deduplication
|
|
105
|
+
logger.info('Copy small file: %s to %s', src_path, dst_path)
|
|
106
|
+
file_hash = copy_and_hash(src_path, dst_path, progress=progress, total_size=size)
|
|
107
|
+
backup_result.copied_files += 1
|
|
108
|
+
backup_result.copied_size += size
|
|
109
|
+
backup_result.copied_small_files += 1
|
|
110
|
+
backup_result.copied_small_size += size
|
|
111
|
+
store_hash(dst_path, file_hash)
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
if size in size_db:
|
|
115
|
+
logger.debug('File with size %iBytes found before -> hash: %s', size, src_path)
|
|
116
|
+
|
|
117
|
+
if size <= CHUNK_SIZE:
|
|
118
|
+
# File can be read complete into memory
|
|
119
|
+
logger.debug('File size %iBytes <= CHUNK_SIZE (%iBytes) -> read complete into memory', size, CHUNK_SIZE)
|
|
120
|
+
file_content, file_hash = read_and_hash_file(src_path)
|
|
121
|
+
if existing_path := hash_db.get(file_hash):
|
|
122
|
+
logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
|
|
123
|
+
os.link(existing_path, dst_path)
|
|
124
|
+
backup_result.hardlinked_files += 1
|
|
125
|
+
backup_result.hardlinked_size += size
|
|
126
|
+
else:
|
|
127
|
+
logger.info('Store unique file: %s to %s', src_path, dst_path)
|
|
128
|
+
dst_path.write_bytes(file_content)
|
|
129
|
+
hash_db[file_hash] = dst_path
|
|
130
|
+
backup_result.copied_files += 1
|
|
131
|
+
backup_result.copied_size += size
|
|
99
132
|
|
|
100
|
-
if size < size_db.MIN_SIZE:
|
|
101
|
-
# Small file -> always copy without deduplication
|
|
102
|
-
logger.info('Copy small file: %s to %s', src_path, dst_path)
|
|
103
|
-
file_hash = copy_and_hash(src_path, dst_path)
|
|
104
|
-
backup_result.copied_files += 1
|
|
105
|
-
backup_result.copied_size += size
|
|
106
|
-
backup_result.copied_small_files += 1
|
|
107
|
-
backup_result.copied_small_size += size
|
|
108
|
-
store_hash(dst_path, file_hash)
|
|
109
|
-
return
|
|
110
|
-
|
|
111
|
-
if size in size_db:
|
|
112
|
-
logger.debug('File with size %iBytes found before -> hash: %s', size, src_path)
|
|
113
|
-
|
|
114
|
-
if size <= CHUNK_SIZE:
|
|
115
|
-
# File can be read complete into memory
|
|
116
|
-
logger.debug('File size %iBytes <= CHUNK_SIZE (%iBytes) -> read complete into memory', size, CHUNK_SIZE)
|
|
117
|
-
file_content, file_hash = read_and_hash_file(src_path)
|
|
118
|
-
if existing_path := hash_db.get(file_hash):
|
|
119
|
-
logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
|
|
120
|
-
os.link(existing_path, dst_path)
|
|
121
|
-
backup_result.hardlinked_files += 1
|
|
122
|
-
backup_result.hardlinked_size += size
|
|
123
133
|
else:
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
134
|
+
# Large file
|
|
135
|
+
file_hash = hash_file(src_path, progress=progress, total_size=size) # Calculate hash without copying
|
|
136
|
+
|
|
137
|
+
if existing_path := hash_db.get(file_hash):
|
|
138
|
+
logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
|
|
139
|
+
os.link(existing_path, dst_path)
|
|
140
|
+
backup_result.hardlinked_files += 1
|
|
141
|
+
backup_result.hardlinked_size += size
|
|
142
|
+
else:
|
|
143
|
+
logger.info('Copy unique file: %s to %s', src_path, dst_path)
|
|
144
|
+
shutil.copyfile(src_path, dst_path)
|
|
145
|
+
hash_db[file_hash] = dst_path
|
|
146
|
+
backup_result.copied_files += 1
|
|
147
|
+
backup_result.copied_size += size
|
|
148
|
+
|
|
149
|
+
# Keep original file metadata (permission bits, time stamps, and flags)
|
|
150
|
+
shutil.copystat(src_path, dst_path)
|
|
130
151
|
else:
|
|
131
|
-
#
|
|
132
|
-
file_hash =
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
backup_result.hardlinked_files += 1
|
|
138
|
-
backup_result.hardlinked_size += size
|
|
139
|
-
else:
|
|
140
|
-
logger.info('Copy unique file: %s to %s', src_path, dst_path)
|
|
141
|
-
file_hash = copy_and_hash(src_path, dst_path)
|
|
142
|
-
hash_db[file_hash] = dst_path
|
|
143
|
-
backup_result.copied_files += 1
|
|
144
|
-
backup_result.copied_size += size
|
|
145
|
-
|
|
146
|
-
# Keep original file metadata (permission bits, time stamps, and flags)
|
|
147
|
-
shutil.copystat(src_path, dst_path)
|
|
148
|
-
else:
|
|
149
|
-
# A file with this size not backuped before -> Can't be duplicate -> copy and hash
|
|
150
|
-
file_hash = copy_and_hash(src_path, dst_path)
|
|
151
|
-
size_db.add(size)
|
|
152
|
-
hash_db[file_hash] = dst_path
|
|
153
|
-
backup_result.copied_files += 1
|
|
154
|
-
backup_result.copied_size += size
|
|
152
|
+
# A file with this size not backuped before -> Can't be duplicate -> copy and hash
|
|
153
|
+
file_hash = copy_and_hash(src_path, dst_path, progress=progress, total_size=size)
|
|
154
|
+
size_db.add(size)
|
|
155
|
+
hash_db[file_hash] = dst_path
|
|
156
|
+
backup_result.copied_files += 1
|
|
157
|
+
backup_result.copied_size += size
|
|
155
158
|
|
|
156
|
-
|
|
159
|
+
store_hash(dst_path, file_hash)
|
|
157
160
|
|
|
158
161
|
|
|
159
162
|
def backup_tree(
|
|
@@ -205,7 +208,11 @@ def backup_tree(
|
|
|
205
208
|
|
|
206
209
|
print(f'\nBackup to {backup_dir}...\n')
|
|
207
210
|
|
|
208
|
-
with DisplayFileTreeProgress(
|
|
211
|
+
with DisplayFileTreeProgress(
|
|
212
|
+
description=f'Backup {src_root}...',
|
|
213
|
+
total_file_count=src_file_count,
|
|
214
|
+
total_size=src_total_size,
|
|
215
|
+
) as progress:
|
|
209
216
|
# "Databases" for deduplication
|
|
210
217
|
size_db = FileSizeDatabase(phlb_conf_dir)
|
|
211
218
|
hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
|
|
@@ -222,6 +229,7 @@ def backup_tree(
|
|
|
222
229
|
hash_db=hash_db,
|
|
223
230
|
backup_dir=backup_dir,
|
|
224
231
|
backup_result=backup_result,
|
|
232
|
+
progress=progress,
|
|
225
233
|
)
|
|
226
234
|
except Exception as err:
|
|
227
235
|
logger.exception(f'Backup {entry.path} {err.__class__.__name__}: {err}')
|
|
@@ -26,6 +26,7 @@ logger = logging.getLogger(__name__)
|
|
|
26
26
|
|
|
27
27
|
@dataclasses.dataclass
|
|
28
28
|
class CompareResult:
|
|
29
|
+
last_timestamp: str
|
|
29
30
|
compare_dir: Path
|
|
30
31
|
log_file: Path
|
|
31
32
|
#
|
|
@@ -52,6 +53,7 @@ def compare_one_file(
|
|
|
52
53
|
hash_db: FileHashDatabase,
|
|
53
54
|
compare_dir: Path,
|
|
54
55
|
compare_result: CompareResult,
|
|
56
|
+
progress: DisplayFileTreeProgress,
|
|
55
57
|
) -> None:
|
|
56
58
|
src_size = entry.stat().st_size
|
|
57
59
|
|
|
@@ -79,8 +81,8 @@ def compare_one_file(
|
|
|
79
81
|
compare_result.file_size_missmatch += 1
|
|
80
82
|
return
|
|
81
83
|
|
|
82
|
-
src_hash = hash_file(src_path)
|
|
83
|
-
dst_hash = hash_file(dst_path)
|
|
84
|
+
src_hash = hash_file(src_path, progress=progress, total_size=src_size)
|
|
85
|
+
dst_hash = hash_file(dst_path, progress=progress, total_size=dst_size)
|
|
84
86
|
|
|
85
87
|
if src_hash != dst_hash:
|
|
86
88
|
logger.warning(
|
|
@@ -157,12 +159,16 @@ def compare_tree(
|
|
|
157
159
|
with PrintTimingContextManager('Filesystem scan completed in'):
|
|
158
160
|
src_file_count, src_total_size = humanized_fs_scan(src_root, excludes=excludes)
|
|
159
161
|
|
|
160
|
-
with DisplayFileTreeProgress(
|
|
162
|
+
with DisplayFileTreeProgress(
|
|
163
|
+
description=f'Compare {src_root}...',
|
|
164
|
+
total_file_count=src_file_count,
|
|
165
|
+
total_size=src_total_size,
|
|
166
|
+
) as progress:
|
|
161
167
|
# init "databases":
|
|
162
168
|
size_db = FileSizeDatabase(phlb_conf_dir)
|
|
163
169
|
hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
|
|
164
170
|
|
|
165
|
-
compare_result = CompareResult(compare_dir=compare_dir, log_file=log_file)
|
|
171
|
+
compare_result = CompareResult(last_timestamp=last_timestamp, compare_dir=compare_dir, log_file=log_file)
|
|
166
172
|
|
|
167
173
|
next_update = 0
|
|
168
174
|
for entry in iter_scandir_files(src_root, excludes=excludes):
|
|
@@ -174,6 +180,7 @@ def compare_tree(
|
|
|
174
180
|
hash_db=hash_db,
|
|
175
181
|
compare_dir=compare_dir,
|
|
176
182
|
compare_result=compare_result,
|
|
183
|
+
progress=progress,
|
|
177
184
|
)
|
|
178
185
|
except Exception as err:
|
|
179
186
|
logger.exception(f'Compare {entry.path} {err.__class__.__name__}: {err}')
|
|
@@ -183,12 +190,12 @@ def compare_tree(
|
|
|
183
190
|
if now >= next_update:
|
|
184
191
|
progress.update(
|
|
185
192
|
completed_file_count=compare_result.total_file_count,
|
|
186
|
-
|
|
193
|
+
advance_size=compare_result.total_size,
|
|
187
194
|
)
|
|
188
195
|
next_update = now + 0.5
|
|
189
196
|
|
|
190
197
|
# Finalize progress indicator values:
|
|
191
|
-
progress.update(completed_file_count=compare_result.total_file_count,
|
|
198
|
+
progress.update(completed_file_count=compare_result.total_file_count, advance_size=compare_result.total_size)
|
|
192
199
|
|
|
193
200
|
summary_file = compare_main_dir / f'{now_timestamp}-summary.txt'
|
|
194
201
|
with TeeStdoutContext(summary_file):
|
|
@@ -11,6 +11,8 @@ BASE_PATH = Path(PyHardLinkBackup.__file__).parent
|
|
|
11
11
|
##########################################################################
|
|
12
12
|
# "Settings" for PyHardLinkBackup:
|
|
13
13
|
|
|
14
|
-
CHUNK_SIZE = 64 * 1024 * 1024 # 64 MB
|
|
15
|
-
SMALL_FILE_THRESHOLD = 1000 # bytes
|
|
16
14
|
HASH_ALGO = 'sha256'
|
|
15
|
+
SMALL_FILE_THRESHOLD = 1000 # bytes
|
|
16
|
+
CHUNK_SIZE = 64 * 1024 * 1024 # 64 MB
|
|
17
|
+
LAGE_FILE_PROGRESS_MIN_SIZE = CHUNK_SIZE * 3
|
|
18
|
+
|
|
@@ -41,6 +41,7 @@ def rebuild_one_file(
|
|
|
41
41
|
size_db: FileSizeDatabase,
|
|
42
42
|
hash_db: FileHashDatabase,
|
|
43
43
|
rebuild_result: RebuildResult,
|
|
44
|
+
progress: DisplayFileTreeProgress,
|
|
44
45
|
):
|
|
45
46
|
file_path = Path(entry.path)
|
|
46
47
|
|
|
@@ -62,7 +63,7 @@ def rebuild_one_file(
|
|
|
62
63
|
# Small files will never deduplicate, skip them
|
|
63
64
|
return
|
|
64
65
|
|
|
65
|
-
file_hash = hash_file(file_path)
|
|
66
|
+
file_hash = hash_file(file_path, progress=progress, total_size=size)
|
|
66
67
|
|
|
67
68
|
if size not in size_db:
|
|
68
69
|
size_db.add(size)
|
|
@@ -121,7 +122,11 @@ def rebuild(
|
|
|
121
122
|
file_count -= 1
|
|
122
123
|
total_size -= file.stat().st_size
|
|
123
124
|
|
|
124
|
-
with DisplayFileTreeProgress(
|
|
125
|
+
with DisplayFileTreeProgress(
|
|
126
|
+
description=f'Rebuild {backup_root}...',
|
|
127
|
+
total_file_count=file_count,
|
|
128
|
+
total_size=total_size,
|
|
129
|
+
) as progress:
|
|
125
130
|
# "Databases" for deduplication
|
|
126
131
|
size_db = FileSizeDatabase(phlb_conf_dir)
|
|
127
132
|
hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
|
|
@@ -137,6 +142,7 @@ def rebuild(
|
|
|
137
142
|
size_db=size_db,
|
|
138
143
|
hash_db=hash_db,
|
|
139
144
|
rebuild_result=rebuild_result,
|
|
145
|
+
progress=progress,
|
|
140
146
|
)
|
|
141
147
|
except Exception as err:
|
|
142
148
|
logger.exception(f'Backup {entry.path} {err.__class__.__name__}: {err}')
|
|
@@ -145,12 +151,12 @@ def rebuild(
|
|
|
145
151
|
now = time.monotonic()
|
|
146
152
|
if now >= next_update:
|
|
147
153
|
progress.update(
|
|
148
|
-
completed_file_count=rebuild_result.process_count,
|
|
154
|
+
completed_file_count=rebuild_result.process_count, advance_size=rebuild_result.process_size
|
|
149
155
|
)
|
|
150
156
|
next_update = now + 0.5
|
|
151
157
|
|
|
152
158
|
# Finalize progress indicator values:
|
|
153
|
-
progress.update(completed_file_count=rebuild_result.process_count,
|
|
159
|
+
progress.update(completed_file_count=rebuild_result.process_count, advance_size=rebuild_result.process_size)
|
|
154
160
|
|
|
155
161
|
summary_file = backup_root / f'{timestamp}-rebuild-summary.txt'
|
|
156
162
|
with TeeStdoutContext(summary_file):
|