PyHardLinkBackup 1.6.0__tar.gz → 1.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.pre-commit-config.yaml +1 -1
  2. pyhardlinkbackup-1.6.0/README.md → pyhardlinkbackup-1.7.1/PKG-INFO +95 -18
  3. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/__init__.py +1 -1
  4. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/backup.py +66 -58
  5. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/compare_backup.py +11 -5
  6. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/constants.py +4 -2
  7. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/rebuild_databases.py +10 -4
  8. pyhardlinkbackup-1.7.1/PyHardLinkBackup/tests/test_backup.py +935 -0
  9. pyhardlinkbackup-1.7.1/PyHardLinkBackup/tests/test_compare_backup.py +165 -0
  10. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/tests/test_rebuild_database.py +3 -5
  11. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/filesystem.py +42 -11
  12. pyhardlinkbackup-1.7.1/PyHardLinkBackup/utilities/rich_utils.py +248 -0
  13. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/tests/test_filesystem.py +11 -13
  14. pyhardlinkbackup-1.7.1/PyHardLinkBackup/utilities/tests/unittest_utilities.py +78 -0
  15. pyhardlinkbackup-1.6.0/PKG-INFO → pyhardlinkbackup-1.7.1/README.md +80 -33
  16. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/docs/README.md +2 -2
  17. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/pyproject.toml +1 -1
  18. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/uv.lock +48 -48
  19. pyhardlinkbackup-1.6.0/PyHardLinkBackup/tests/test_backup.py +0 -706
  20. pyhardlinkbackup-1.6.0/PyHardLinkBackup/tests/test_compare_backup.py +0 -145
  21. pyhardlinkbackup-1.6.0/PyHardLinkBackup/utilities/rich_utils.py +0 -99
  22. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.editorconfig +0 -0
  23. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.github/workflows/tests.yml +0 -0
  24. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.gitignore +0 -0
  25. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.idea/.gitignore +0 -0
  26. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.pre-commit-hooks.yaml +0 -0
  27. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.run/Template Python tests.run.xml +0 -0
  28. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.run/Unittests - __all__.run.xml +0 -0
  29. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.run/cli.py --help.run.xml +0 -0
  30. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.run/dev-cli update.run.xml +0 -0
  31. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.run/only DocTests.run.xml +0 -0
  32. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.run/only DocWrite.run.xml +0 -0
  33. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/.venv-app/lib/python3.12/site-packages/cli_base/tests/shell_complete_snapshots/.gitignore +0 -0
  34. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/__main__.py +0 -0
  35. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_app/__init__.py +0 -0
  36. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_app/phlb.py +0 -0
  37. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_dev/__init__.py +0 -0
  38. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_dev/benchmark.py +0 -0
  39. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_dev/code_style.py +0 -0
  40. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_dev/packaging.py +0 -0
  41. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_dev/shell_completion.py +0 -0
  42. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_dev/testing.py +0 -0
  43. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/cli_dev/update_readme_history.py +0 -0
  44. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/logging_setup.py +0 -0
  45. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/tests/__init__.py +0 -0
  46. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/tests/test_doc_write.py +0 -0
  47. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/tests/test_doctests.py +0 -0
  48. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/tests/test_project_setup.py +0 -0
  49. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/tests/test_readme.py +0 -0
  50. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/tests/test_readme_history.py +0 -0
  51. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/__init__.py +0 -0
  52. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/file_hash_database.py +0 -0
  53. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/file_size_database.py +0 -0
  54. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/humanize.py +0 -0
  55. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/sha256sums.py +0 -0
  56. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/tee.py +0 -0
  57. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/tests/__init__.py +0 -0
  58. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +0 -0
  59. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/tests/test_file_size_database.py +0 -0
  60. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/PyHardLinkBackup/utilities/tyro_cli_shared_args.py +0 -0
  61. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/cli.py +0 -0
  62. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/dev-cli.py +0 -0
  63. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/dist/.gitignore +0 -0
  64. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/docs/about-docs.md +0 -0
  65. {pyhardlinkbackup-1.6.0 → pyhardlinkbackup-1.7.1}/noxfile.py +0 -0
@@ -2,6 +2,6 @@
2
2
  # See https://pre-commit.com for more information
3
3
  repos:
4
4
  - repo: https://github.com/jedie/cli-base-utilities
5
- rev: v0.27.0
5
+ rev: v0.27.1
6
6
  hooks:
7
7
  - id: update-readme-history
@@ -1,3 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: PyHardLinkBackup
3
+ Version: 1.7.1
4
+ Summary: HardLink/Deduplication Backups with Python
5
+ Project-URL: Documentation, https://github.com/jedie/PyHardLinkBackup
6
+ Project-URL: Source, https://github.com/jedie/PyHardLinkBackup
7
+ Author-email: Jens Diemer <PyHardLinkBackup@jensdiemer.de>
8
+ License: GPL-3.0-or-later
9
+ Requires-Python: >=3.12
10
+ Requires-Dist: bx-py-utils
11
+ Requires-Dist: cli-base-utilities>=0.27.1
12
+ Requires-Dist: rich
13
+ Requires-Dist: tyro
14
+ Description-Content-Type: text/markdown
15
+
1
16
  # PyHardLinkBackup
2
17
 
3
18
  [![tests](https://github.com/jedie/PyHardLinkBackup/actions/workflows/tests.yml/badge.svg?branch=main)](https://github.com/jedie/PyHardLinkBackup/actions/workflows/tests.yml)
@@ -6,11 +21,20 @@
6
21
  [![Python Versions](https://img.shields.io/pypi/pyversions/PyHardLinkBackup)](https://github.com/jedie/PyHardLinkBackup/blob/main/pyproject.toml)
7
22
  [![License GPL-3.0-or-later](https://img.shields.io/pypi/l/PyHardLinkBackup)](https://github.com/jedie/PyHardLinkBackup/blob/main/LICENSE)
8
23
 
9
- HardLink/Deduplication Backups with Python
24
+ PyHardLinkBackup is a cross-platform backup tool designed for efficient, reliable, and accessible backups.
25
+ Similar to `rsync --link-dest`, but with global deduplication across all backups and all paths, not just between two directories.
26
+
27
+ Some aspects:
28
+
29
+ - Creates deduplicated, versioned backups using hardlinks, minimizing storage usage by linking identical files across all backup snapshots.
30
+ - Employs a global deduplication database (by file size and SHA256 hash) per backup root, ensuring that duplicate files are detected and hardlinked even if they are moved or renamed between backups.
31
+ - Backups are stored as regular files and directories—no proprietary formats—so you can access your data directly without special tools.
32
+ - Deleting old snapshots does not affect the integrity of remaining backups.
33
+ - Linux and macOS are fully supported (Windows support is experimental)
10
34
 
11
- **WIP:** v1.0.0 is a complete rewrite of PyHardLinkBackup.
35
+ Limitations:
12
36
 
13
- It's similar to `rsync --link-dest` but the deduplication is done globally for all backups and all paths.
37
+ - Requires a filesystem that supports hardlinks (e.g., btrfs, zfs, ext4, APFS, NTFS with limitations).
14
38
 
15
39
  ## installation
16
40
 
@@ -23,7 +47,16 @@ pipx install PyHardLinkBackup
23
47
  ```
24
48
 
25
49
  After this you can call the CLI via `phlb` command.
26
- The main command is `phlb backup <source> <destination>`:
50
+ The main command is `phlb backup <source> <destination>` to create a backup.
51
+
52
+ e.g.:
53
+
54
+ ```bash
55
+ phlb backup /path/to/source /path/to/destination
56
+ ```
57
+
58
+ This will create a snapshot in `/path/to/destination` using hard links for deduplication. You can safely delete old snapshots without affecting others.
59
+
27
60
 
28
61
  [comment]: <> (✂✂✂ auto generated backup help start ✂✂✂)
29
62
  ```
@@ -48,20 +81,59 @@ Backup the source directory to the destination directory using hard links for de
48
81
  [comment]: <> (✂✂✂ auto generated backup help end ✂✂✂)
49
82
 
50
83
 
84
+ ## Screenshots
85
+ ### Screenshot - running a backup
86
+
87
+ ----
88
+
89
+ ![2026-01-19_phlb1.png](https://raw.githubusercontent.com/jedie/jedie.github.io/main/screenshots/PyHardLinkBackup/2026-01-19_phlb1.png "2026-01-19_phlb1.png")
90
+
91
+ ----
92
+
93
+ ### Screenshot - backup finished
94
+
95
+ ----
96
+
97
+ ![2026-01-19_phlb2.png](https://raw.githubusercontent.com/jedie/jedie.github.io/main/screenshots/PyHardLinkBackup/2026-01-19_phlb2.png "2026-01-19_phlb2.png")
98
+
99
+ ----
100
+
101
+ (more screenshots here: [jedie.github.io/tree/main/screenshots/PyHardLinkBackup](https://github.com/jedie/jedie.github.io/tree/main/screenshots/PyHardLinkBackup))
102
+
103
+
104
+ ### update
105
+
106
+ If you use pipx, just call:
107
+ ```bash
108
+ pipx upgrade PyHardLinkBackup
109
+ ```
110
+ see: https://pipx.pypa.io/stable/docs/#pipx-upgrade
51
111
 
52
- Running a backup looks like:
53
112
 
54
- ![2026-01-15-phlb1.png](https://raw.githubusercontent.com/jedie/jedie.github.io/main/screenshots/PyHardLinkBackup/2026-01-15-phlb1.png "2026-01-15-phlb1.png")
113
+ ### Troubleshooting
55
114
 
115
+ - **Permission Errors:** Ensure you have read access to source and write access to destination.
116
+ - **Hardlink Limits:** Some filesystems (e.g., NTFS) have limits on the number of hardlinks per file.
117
+ - **Symlink Handling:** Broken symlinks are handled gracefully; see logs for details.
118
+ - **Backup Deletion:** Deleting a snapshot does not affect deduplication of other backups.
119
+ - **Log Files:** Check the log file in each backup directory for error details.
56
120
 
57
121
 
58
- If it's finished it display a summary:
122
+ To lower the priority of the backup process (useful to reduce system impact during heavy backups), you can use `nice` and `ionice` on Linux systems:
59
123
 
60
- ![2026-01-15-phlb2.png](https://raw.githubusercontent.com/jedie/jedie.github.io/main/screenshots/PyHardLinkBackup/2026-01-15-phlb2.png "2026-01-15-phlb2.png")
124
+ ```bash
125
+ nice -n 19 ionice -c3 phlb backup /path/to/source /path/to/destination
126
+ ```
127
+ - `nice -n 19` sets the lowest CPU priority.
128
+ - `ionice -c3` sets the lowest I/O priority (idle class).
61
129
 
130
+ Adjust priority of an already running backup:
131
+ ```bash
132
+ renice 19 -p $(pgrep phlb) && ionice -c3 -p $(pgrep phlb)
133
+ ```
62
134
 
63
135
 
64
- complete help for main CLI app:
136
+ ### complete help for main CLI app
65
137
 
66
138
  [comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
67
139
  ```
@@ -84,13 +156,7 @@ usage: phlb [-h] {backup,compare,rebuild,version}
84
156
  [comment]: <> (✂✂✂ auto generated main help end ✂✂✂)
85
157
 
86
158
 
87
- ### update
88
159
 
89
- If you use pipx, just call:
90
- ```bash
91
- pipx upgrade PyHardLinkBackup
92
- ```
93
- see: https://pipx.pypa.io/stable/docs/#pipx-upgrade
94
160
 
95
161
 
96
162
  ## concept
@@ -217,21 +283,32 @@ Overview of main changes:
217
283
 
218
284
  [comment]: <> (✂✂✂ auto generated history start ✂✂✂)
219
285
 
286
+ * [v1.7.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.7.0...v1.7.1)
287
+ * 2026-01-19 - Update requirements to fix problems under Windows
288
+ * [v1.7.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.6.0...v1.7.0)
289
+ * 2026-01-19 - Speedup and enhance unittest
290
+ * 2026-01-17 - Remove unfinished copied files on errors
291
+ * 2026-01-17 - Display/update progress on very lage files #75 and enhance all bars
292
+ * 2026-01-18 - Expand tests: Check file open calls
293
+ * 2026-01-17 - expand tests
294
+ * 2026-01-17 - simplify tests
295
+ * 2026-01-17 - Warn if broken symlink found
296
+ * 2026-01-17 - Update README
220
297
  * [v1.6.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.5.0...v1.6.0)
221
298
  * 2026-01-17 - Fix flaky test, because of terminal size
222
299
  * 2026-01-17 - Bugfix: Don't hash new large files twice
223
300
  * 2026-01-17 - Use compare also in backup tests
224
301
  * [v1.5.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.4.1...v1.5.0)
225
302
  * 2026-01-17 - NEW: Compare command to verify source tree with last backup
303
+
304
+ <details><summary>Expand older history entries ...</summary>
305
+
226
306
  * [v1.4.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.4.0...v1.4.1)
227
307
  * 2026-01-16 - Bugfix large file handling
228
308
  * [v1.4.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.3.0...v1.4.0)
229
309
  * 2026-01-16 - Create log file in backup and a summary.txt
230
310
  * 2026-01-16 - Run CI tests on macos, too.
231
311
  * 2026-01-16 - add dev cli command "scan-benchmark"
232
-
233
- <details><summary>Expand older history entries ...</summary>
234
-
235
312
  * [v1.3.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.2.0...v1.3.0)
236
313
  * 2026-01-15 - Verify SHA256SUMS files in "rebuild" command, too.
237
314
  * 2026-01-15 - Code cleanup: use more generic names for and in BackupProgress
@@ -3,5 +3,5 @@
3
3
  """
4
4
 
5
5
  # See https://packaging.python.org/en/latest/specifications/version-specifiers/
6
- __version__ = '1.6.0'
6
+ __version__ = '1.7.1'
7
7
  __author__ = 'Jens Diemer <PyHardLinkBackup@jensdiemer.de>'
@@ -14,6 +14,7 @@ from PyHardLinkBackup.logging_setup import LoggingManager
14
14
  from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
15
15
  from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
16
16
  from PyHardLinkBackup.utilities.filesystem import (
17
+ RemoveFileOnError,
17
18
  copy_and_hash,
18
19
  hash_file,
19
20
  humanized_fs_scan,
@@ -59,6 +60,7 @@ def backup_one_file(
59
60
  hash_db: FileHashDatabase,
60
61
  backup_dir: Path,
61
62
  backup_result: BackupResult,
63
+ progress: DisplayFileTreeProgress,
62
64
  ) -> None:
63
65
  backup_result.backup_count += 1
64
66
  src_path = Path(entry.path)
@@ -70,8 +72,8 @@ def backup_one_file(
70
72
 
71
73
  try:
72
74
  size = entry.stat().st_size
73
- except FileNotFoundError:
74
- # e.g.: Handle broken symlink
75
+ except FileNotFoundError as err:
76
+ logger.warning(f'Broken symlink {src_path}: {err.__class__.__name__}: {err}')
75
77
  target = os.readlink(src_path)
76
78
  dst_path.symlink_to(target)
77
79
  backup_result.symlink_files += 1
@@ -95,65 +97,66 @@ def backup_one_file(
95
97
  # Process regular files
96
98
  assert entry.is_file(follow_symlinks=False), f'Unexpected non-file: {src_path}'
97
99
 
98
- # Deduplication logic
100
+ with RemoveFileOnError(dst_path):
101
+ # Deduplication logic
102
+
103
+ if size < size_db.MIN_SIZE:
104
+ # Small file -> always copy without deduplication
105
+ logger.info('Copy small file: %s to %s', src_path, dst_path)
106
+ file_hash = copy_and_hash(src_path, dst_path, progress=progress, total_size=size)
107
+ backup_result.copied_files += 1
108
+ backup_result.copied_size += size
109
+ backup_result.copied_small_files += 1
110
+ backup_result.copied_small_size += size
111
+ store_hash(dst_path, file_hash)
112
+ return
113
+
114
+ if size in size_db:
115
+ logger.debug('File with size %iBytes found before -> hash: %s', size, src_path)
116
+
117
+ if size <= CHUNK_SIZE:
118
+ # File can be read complete into memory
119
+ logger.debug('File size %iBytes <= CHUNK_SIZE (%iBytes) -> read complete into memory', size, CHUNK_SIZE)
120
+ file_content, file_hash = read_and_hash_file(src_path)
121
+ if existing_path := hash_db.get(file_hash):
122
+ logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
123
+ os.link(existing_path, dst_path)
124
+ backup_result.hardlinked_files += 1
125
+ backup_result.hardlinked_size += size
126
+ else:
127
+ logger.info('Store unique file: %s to %s', src_path, dst_path)
128
+ dst_path.write_bytes(file_content)
129
+ hash_db[file_hash] = dst_path
130
+ backup_result.copied_files += 1
131
+ backup_result.copied_size += size
99
132
 
100
- if size < size_db.MIN_SIZE:
101
- # Small file -> always copy without deduplication
102
- logger.info('Copy small file: %s to %s', src_path, dst_path)
103
- file_hash = copy_and_hash(src_path, dst_path)
104
- backup_result.copied_files += 1
105
- backup_result.copied_size += size
106
- backup_result.copied_small_files += 1
107
- backup_result.copied_small_size += size
108
- store_hash(dst_path, file_hash)
109
- return
110
-
111
- if size in size_db:
112
- logger.debug('File with size %iBytes found before -> hash: %s', size, src_path)
113
-
114
- if size <= CHUNK_SIZE:
115
- # File can be read complete into memory
116
- logger.debug('File size %iBytes <= CHUNK_SIZE (%iBytes) -> read complete into memory', size, CHUNK_SIZE)
117
- file_content, file_hash = read_and_hash_file(src_path)
118
- if existing_path := hash_db.get(file_hash):
119
- logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
120
- os.link(existing_path, dst_path)
121
- backup_result.hardlinked_files += 1
122
- backup_result.hardlinked_size += size
123
133
  else:
124
- logger.info('Store unique file: %s to %s', src_path, dst_path)
125
- dst_path.write_bytes(file_content)
126
- hash_db[file_hash] = dst_path
127
- backup_result.copied_files += 1
128
- backup_result.copied_size += size
129
-
134
+ # Large file
135
+ file_hash = hash_file(src_path, progress=progress, total_size=size) # Calculate hash without copying
136
+
137
+ if existing_path := hash_db.get(file_hash):
138
+ logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
139
+ os.link(existing_path, dst_path)
140
+ backup_result.hardlinked_files += 1
141
+ backup_result.hardlinked_size += size
142
+ else:
143
+ logger.info('Copy unique file: %s to %s', src_path, dst_path)
144
+ shutil.copyfile(src_path, dst_path)
145
+ hash_db[file_hash] = dst_path
146
+ backup_result.copied_files += 1
147
+ backup_result.copied_size += size
148
+
149
+ # Keep original file metadata (permission bits, time stamps, and flags)
150
+ shutil.copystat(src_path, dst_path)
130
151
  else:
131
- # Large file
132
- file_hash = hash_file(src_path) # Calculate hash without copying
133
-
134
- if existing_path := hash_db.get(file_hash):
135
- logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
136
- os.link(existing_path, dst_path)
137
- backup_result.hardlinked_files += 1
138
- backup_result.hardlinked_size += size
139
- else:
140
- logger.info('Copy unique file: %s to %s', src_path, dst_path)
141
- shutil.copyfile(src_path, dst_path)
142
- hash_db[file_hash] = dst_path
143
- backup_result.copied_files += 1
144
- backup_result.copied_size += size
145
-
146
- # Keep original file metadata (permission bits, time stamps, and flags)
147
- shutil.copystat(src_path, dst_path)
148
- else:
149
- # A file with this size not backuped before -> Can't be duplicate -> copy and hash
150
- file_hash = copy_and_hash(src_path, dst_path)
151
- size_db.add(size)
152
- hash_db[file_hash] = dst_path
153
- backup_result.copied_files += 1
154
- backup_result.copied_size += size
152
+ # A file with this size not backuped before -> Can't be duplicate -> copy and hash
153
+ file_hash = copy_and_hash(src_path, dst_path, progress=progress, total_size=size)
154
+ size_db.add(size)
155
+ hash_db[file_hash] = dst_path
156
+ backup_result.copied_files += 1
157
+ backup_result.copied_size += size
155
158
 
156
- store_hash(dst_path, file_hash)
159
+ store_hash(dst_path, file_hash)
157
160
 
158
161
 
159
162
  def backup_tree(
@@ -205,7 +208,11 @@ def backup_tree(
205
208
 
206
209
  print(f'\nBackup to {backup_dir}...\n')
207
210
 
208
- with DisplayFileTreeProgress(src_file_count, src_total_size) as progress:
211
+ with DisplayFileTreeProgress(
212
+ description=f'Backup {src_root}...',
213
+ total_file_count=src_file_count,
214
+ total_size=src_total_size,
215
+ ) as progress:
209
216
  # "Databases" for deduplication
210
217
  size_db = FileSizeDatabase(phlb_conf_dir)
211
218
  hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
@@ -222,6 +229,7 @@ def backup_tree(
222
229
  hash_db=hash_db,
223
230
  backup_dir=backup_dir,
224
231
  backup_result=backup_result,
232
+ progress=progress,
225
233
  )
226
234
  except Exception as err:
227
235
  logger.exception(f'Backup {entry.path} {err.__class__.__name__}: {err}')
@@ -53,6 +53,7 @@ def compare_one_file(
53
53
  hash_db: FileHashDatabase,
54
54
  compare_dir: Path,
55
55
  compare_result: CompareResult,
56
+ progress: DisplayFileTreeProgress,
56
57
  ) -> None:
57
58
  src_size = entry.stat().st_size
58
59
 
@@ -80,8 +81,8 @@ def compare_one_file(
80
81
  compare_result.file_size_missmatch += 1
81
82
  return
82
83
 
83
- src_hash = hash_file(src_path)
84
- dst_hash = hash_file(dst_path)
84
+ src_hash = hash_file(src_path, progress=progress, total_size=src_size)
85
+ dst_hash = hash_file(dst_path, progress=progress, total_size=dst_size)
85
86
 
86
87
  if src_hash != dst_hash:
87
88
  logger.warning(
@@ -158,7 +159,11 @@ def compare_tree(
158
159
  with PrintTimingContextManager('Filesystem scan completed in'):
159
160
  src_file_count, src_total_size = humanized_fs_scan(src_root, excludes=excludes)
160
161
 
161
- with DisplayFileTreeProgress(src_file_count, src_total_size) as progress:
162
+ with DisplayFileTreeProgress(
163
+ description=f'Compare {src_root}...',
164
+ total_file_count=src_file_count,
165
+ total_size=src_total_size,
166
+ ) as progress:
162
167
  # init "databases":
163
168
  size_db = FileSizeDatabase(phlb_conf_dir)
164
169
  hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
@@ -175,6 +180,7 @@ def compare_tree(
175
180
  hash_db=hash_db,
176
181
  compare_dir=compare_dir,
177
182
  compare_result=compare_result,
183
+ progress=progress,
178
184
  )
179
185
  except Exception as err:
180
186
  logger.exception(f'Compare {entry.path} {err.__class__.__name__}: {err}')
@@ -184,12 +190,12 @@ def compare_tree(
184
190
  if now >= next_update:
185
191
  progress.update(
186
192
  completed_file_count=compare_result.total_file_count,
187
- completed_size=compare_result.total_size,
193
+ advance_size=compare_result.total_size,
188
194
  )
189
195
  next_update = now + 0.5
190
196
 
191
197
  # Finalize progress indicator values:
192
- progress.update(completed_file_count=compare_result.total_file_count, completed_size=compare_result.total_size)
198
+ progress.update(completed_file_count=compare_result.total_file_count, advance_size=compare_result.total_size)
193
199
 
194
200
  summary_file = compare_main_dir / f'{now_timestamp}-summary.txt'
195
201
  with TeeStdoutContext(summary_file):
@@ -11,6 +11,8 @@ BASE_PATH = Path(PyHardLinkBackup.__file__).parent
11
11
  ##########################################################################
12
12
  # "Settings" for PyHardLinkBackup:
13
13
 
14
- CHUNK_SIZE = 64 * 1024 * 1024 # 64 MB
15
- SMALL_FILE_THRESHOLD = 1000 # bytes
16
14
  HASH_ALGO = 'sha256'
15
+ SMALL_FILE_THRESHOLD = 1000 # bytes
16
+ CHUNK_SIZE = 64 * 1024 * 1024 # 64 MB
17
+ LAGE_FILE_PROGRESS_MIN_SIZE = CHUNK_SIZE * 3
18
+
@@ -41,6 +41,7 @@ def rebuild_one_file(
41
41
  size_db: FileSizeDatabase,
42
42
  hash_db: FileHashDatabase,
43
43
  rebuild_result: RebuildResult,
44
+ progress: DisplayFileTreeProgress,
44
45
  ):
45
46
  file_path = Path(entry.path)
46
47
 
@@ -62,7 +63,7 @@ def rebuild_one_file(
62
63
  # Small files will never deduplicate, skip them
63
64
  return
64
65
 
65
- file_hash = hash_file(file_path)
66
+ file_hash = hash_file(file_path, progress=progress, total_size=size)
66
67
 
67
68
  if size not in size_db:
68
69
  size_db.add(size)
@@ -121,7 +122,11 @@ def rebuild(
121
122
  file_count -= 1
122
123
  total_size -= file.stat().st_size
123
124
 
124
- with DisplayFileTreeProgress(file_count, total_size) as progress:
125
+ with DisplayFileTreeProgress(
126
+ description=f'Rebuild {backup_root}...',
127
+ total_file_count=file_count,
128
+ total_size=total_size,
129
+ ) as progress:
125
130
  # "Databases" for deduplication
126
131
  size_db = FileSizeDatabase(phlb_conf_dir)
127
132
  hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
@@ -137,6 +142,7 @@ def rebuild(
137
142
  size_db=size_db,
138
143
  hash_db=hash_db,
139
144
  rebuild_result=rebuild_result,
145
+ progress=progress,
140
146
  )
141
147
  except Exception as err:
142
148
  logger.exception(f'Backup {entry.path} {err.__class__.__name__}: {err}')
@@ -145,12 +151,12 @@ def rebuild(
145
151
  now = time.monotonic()
146
152
  if now >= next_update:
147
153
  progress.update(
148
- completed_file_count=rebuild_result.process_count, completed_size=rebuild_result.process_size
154
+ completed_file_count=rebuild_result.process_count, advance_size=rebuild_result.process_size
149
155
  )
150
156
  next_update = now + 0.5
151
157
 
152
158
  # Finalize progress indicator values:
153
- progress.update(completed_file_count=rebuild_result.process_count, completed_size=rebuild_result.process_size)
159
+ progress.update(completed_file_count=rebuild_result.process_count, advance_size=rebuild_result.process_size)
154
160
 
155
161
  summary_file = backup_root / f'{timestamp}-rebuild-summary.txt'
156
162
  with TeeStdoutContext(summary_file):