PyHardLinkBackup 1.5.0__tar.gz → 1.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PKG-INFO +83 -19
  2. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/__init__.py +1 -1
  3. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/backup.py +66 -58
  4. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/compare_backup.py +13 -6
  5. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/constants.py +4 -2
  6. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/rebuild_databases.py +10 -4
  7. pyhardlinkbackup-1.7.0/PyHardLinkBackup/tests/test_backup.py +935 -0
  8. pyhardlinkbackup-1.7.0/PyHardLinkBackup/tests/test_compare_backup.py +165 -0
  9. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/tests/test_rebuild_database.py +3 -5
  10. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/filesystem.py +42 -11
  11. pyhardlinkbackup-1.7.0/PyHardLinkBackup/utilities/rich_utils.py +248 -0
  12. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/tests/test_filesystem.py +11 -13
  13. pyhardlinkbackup-1.7.0/PyHardLinkBackup/utilities/tests/unittest_utilities.py +78 -0
  14. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/README.md +82 -18
  15. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/docs/README.md +2 -2
  16. pyhardlinkbackup-1.5.0/PyHardLinkBackup/tests/test_backup.py +0 -628
  17. pyhardlinkbackup-1.5.0/PyHardLinkBackup/tests/test_compare_backup.py +0 -86
  18. pyhardlinkbackup-1.5.0/PyHardLinkBackup/utilities/rich_utils.py +0 -99
  19. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.editorconfig +0 -0
  20. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.github/workflows/tests.yml +0 -0
  21. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.gitignore +0 -0
  22. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.idea/.gitignore +0 -0
  23. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.pre-commit-config.yaml +0 -0
  24. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.pre-commit-hooks.yaml +0 -0
  25. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.run/Template Python tests.run.xml +0 -0
  26. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.run/Unittests - __all__.run.xml +0 -0
  27. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.run/cli.py --help.run.xml +0 -0
  28. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.run/dev-cli update.run.xml +0 -0
  29. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.run/only DocTests.run.xml +0 -0
  30. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.run/only DocWrite.run.xml +0 -0
  31. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/.venv-app/lib/python3.12/site-packages/cli_base/tests/shell_complete_snapshots/.gitignore +0 -0
  32. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/__main__.py +0 -0
  33. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_app/__init__.py +0 -0
  34. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_app/phlb.py +0 -0
  35. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_dev/__init__.py +0 -0
  36. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_dev/benchmark.py +0 -0
  37. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_dev/code_style.py +0 -0
  38. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_dev/packaging.py +0 -0
  39. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_dev/shell_completion.py +0 -0
  40. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_dev/testing.py +0 -0
  41. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/cli_dev/update_readme_history.py +0 -0
  42. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/logging_setup.py +0 -0
  43. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/tests/__init__.py +0 -0
  44. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/tests/test_doc_write.py +0 -0
  45. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/tests/test_doctests.py +0 -0
  46. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/tests/test_project_setup.py +0 -0
  47. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/tests/test_readme.py +0 -0
  48. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/tests/test_readme_history.py +0 -0
  49. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/__init__.py +0 -0
  50. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/file_hash_database.py +0 -0
  51. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/file_size_database.py +0 -0
  52. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/humanize.py +0 -0
  53. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/sha256sums.py +0 -0
  54. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/tee.py +0 -0
  55. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/tests/__init__.py +0 -0
  56. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +0 -0
  57. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/tests/test_file_size_database.py +0 -0
  58. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/PyHardLinkBackup/utilities/tyro_cli_shared_args.py +0 -0
  59. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/cli.py +0 -0
  60. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/dev-cli.py +0 -0
  61. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/dist/.gitignore +0 -0
  62. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/docs/about-docs.md +0 -0
  63. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/noxfile.py +0 -0
  64. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/pyproject.toml +0 -0
  65. {pyhardlinkbackup-1.5.0 → pyhardlinkbackup-1.7.0}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PyHardLinkBackup
3
- Version: 1.5.0
3
+ Version: 1.7.0
4
4
  Summary: HardLink/Deduplication Backups with Python
5
5
  Project-URL: Documentation, https://github.com/jedie/PyHardLinkBackup
6
6
  Project-URL: Source, https://github.com/jedie/PyHardLinkBackup
@@ -21,11 +21,20 @@ Description-Content-Type: text/markdown
21
21
  [![Python Versions](https://img.shields.io/pypi/pyversions/PyHardLinkBackup)](https://github.com/jedie/PyHardLinkBackup/blob/main/pyproject.toml)
22
22
  [![License GPL-3.0-or-later](https://img.shields.io/pypi/l/PyHardLinkBackup)](https://github.com/jedie/PyHardLinkBackup/blob/main/LICENSE)
23
23
 
24
- HardLink/Deduplication Backups with Python
24
+ PyHardLinkBackup is a cross-platform backup tool designed for efficient, reliable, and accessible backups.
25
+ Similar to `rsync --link-dest`, but with global deduplication across all backups and all paths, not just between two directories.
25
26
 
26
- **WIP:** v1.0.0 is a complete rewrite of PyHardLinkBackup.
27
+ Some aspects:
27
28
 
28
- It's similar to `rsync --link-dest` but the deduplication is done globally for all backups and all paths.
29
+ - Creates deduplicated, versioned backups using hardlinks, minimizing storage usage by linking identical files across all backup snapshots.
30
+ - Employs a global deduplication database (by file size and SHA256 hash) per backup root, ensuring that duplicate files are detected and hardlinked even if they are moved or renamed between backups.
31
+ - Backups are stored as regular files and directories—no proprietary formats—so you can access your data directly without special tools.
32
+ - Deleting old snapshots does not affect the integrity of remaining backups.
33
+ - Linux and macOS are fully supported (Windows support is experimental)
34
+
35
+ Limitations:
36
+
37
+ - Requires a filesystem that supports hardlinks (e.g., btrfs, zfs, ext4, APFS, NTFS with limitations).
29
38
 
30
39
  ## installation
31
40
 
@@ -38,7 +47,16 @@ pipx install PyHardLinkBackup
38
47
  ```
39
48
 
40
49
  After this you can call the CLI via `phlb` command.
41
- The main command is `phlb backup <source> <destination>`:
50
+ The main command is `phlb backup <source> <destination>` to create a backup.
51
+
52
+ e.g.:
53
+
54
+ ```bash
55
+ phlb backup /path/to/source /path/to/destination
56
+ ```
57
+
58
+ This will create a snapshot in `/path/to/destination` using hard links for deduplication. You can safely delete old snapshots without affecting others.
59
+
42
60
 
43
61
  [comment]: <> (✂✂✂ auto generated backup help start ✂✂✂)
44
62
  ```
@@ -63,20 +81,59 @@ Backup the source directory to the destination directory using hard links for de
63
81
  [comment]: <> (✂✂✂ auto generated backup help end ✂✂✂)
64
82
 
65
83
 
84
+ ## Screenshots
85
+ ### Screenshot - running a backup
86
+
87
+ ----
88
+
89
+ ![2026-01-19_phlb1.png](https://raw.githubusercontent.com/jedie/jedie.github.io/main/screenshots/PyHardLinkBackup/2026-01-19_phlb1.png "2026-01-19_phlb1.png")
90
+
91
+ ----
66
92
 
67
- Running a backup looks like:
93
+ ### Screenshot - backup finished
68
94
 
69
- ![2026-01-15-phlb1.png](https://raw.githubusercontent.com/jedie/jedie.github.io/main/screenshots/PyHardLinkBackup/2026-01-15-phlb1.png "2026-01-15-phlb1.png")
95
+ ----
70
96
 
97
+ ![2026-01-19_phlb2.png](https://raw.githubusercontent.com/jedie/jedie.github.io/main/screenshots/PyHardLinkBackup/2026-01-19_phlb2.png "2026-01-19_phlb2.png")
71
98
 
99
+ ----
100
+
101
+ (more screenshots here: [jedie.github.io/tree/main/screenshots/PyHardLinkBackup](https://github.com/jedie/jedie.github.io/tree/main/screenshots/PyHardLinkBackup))
102
+
103
+
104
+ ### update
105
+
106
+ If you use pipx, just call:
107
+ ```bash
108
+ pipx upgrade PyHardLinkBackup
109
+ ```
110
+ see: https://pipx.pypa.io/stable/docs/#pipx-upgrade
72
111
 
73
- If it's finished it display a summary:
74
112
 
75
- ![2026-01-15-phlb2.png](https://raw.githubusercontent.com/jedie/jedie.github.io/main/screenshots/PyHardLinkBackup/2026-01-15-phlb2.png "2026-01-15-phlb2.png")
113
+ ### Troubleshooting
76
114
 
115
+ - **Permission Errors:** Ensure you have read access to source and write access to destination.
116
+ - **Hardlink Limits:** Some filesystems (e.g., NTFS) have limits on the number of hardlinks per file.
117
+ - **Symlink Handling:** Broken symlinks are handled gracefully; see logs for details.
118
+ - **Backup Deletion:** Deleting a snapshot does not affect deduplication of other backups.
119
+ - **Log Files:** Check the log file in each backup directory for error details.
77
120
 
78
121
 
79
- complete help for main CLI app:
122
+ To lower the priority of the backup process (useful to reduce system impact during heavy backups), you can use `nice` and `ionice` on Linux systems:
123
+
124
+ ```bash
125
+ nice -n 19 ionice -c3 phlb backup /path/to/source /path/to/destination
126
+ ```
127
+ - `nice -n 19` sets the lowest CPU priority.
128
+ - `ionice -c3` sets the lowest I/O priority (idle class).
129
+
130
+ Adjust priority of an already running backup:
131
+ ```bash
132
+ renice 19 -p $(pgrep phlb) && ionice -c3 -p $(pgrep phlb)
133
+ ```
134
+
135
+
136
+ ### complete help for main CLI app
80
137
 
81
138
  [comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
82
139
  ```
@@ -99,13 +156,7 @@ usage: phlb [-h] {backup,compare,rebuild,version}
99
156
  [comment]: <> (✂✂✂ auto generated main help end ✂✂✂)
100
157
 
101
158
 
102
- ### update
103
159
 
104
- If you use pipx, just call:
105
- ```bash
106
- pipx upgrade PyHardLinkBackup
107
- ```
108
- see: https://pipx.pypa.io/stable/docs/#pipx-upgrade
109
160
 
110
161
 
111
162
  ## concept
@@ -232,10 +283,26 @@ Overview of main changes:
232
283
 
233
284
  [comment]: <> (✂✂✂ auto generated history start ✂✂✂)
234
285
 
286
+ * [v1.7.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.6.0...v1.7.0)
287
+ * 2026-01-19 - Speedup and enhance unittest
288
+ * 2026-01-17 - Remove unfinished copied files on errors
289
+ * 2026-01-17 - Display/update progress on very lage files #75 and enhance all bars
290
+ * 2026-01-18 - Expand tests: Check file open calls
291
+ * 2026-01-17 - expand tests
292
+ * 2026-01-17 - simplify tests
293
+ * 2026-01-17 - Warn if broken symlink found
294
+ * 2026-01-17 - Update README
295
+ * [v1.6.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.5.0...v1.6.0)
296
+ * 2026-01-17 - Fix flaky test, because of terminal size
297
+ * 2026-01-17 - Bugfix: Don't hash new large files twice
298
+ * 2026-01-17 - Use compare also in backup tests
235
299
  * [v1.5.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.4.1...v1.5.0)
236
300
  * 2026-01-17 - NEW: Compare command to verify source tree with last backup
237
301
  * [v1.4.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.4.0...v1.4.1)
238
302
  * 2026-01-16 - Bugfix large file handling
303
+
304
+ <details><summary>Expand older history entries ...</summary>
305
+
239
306
  * [v1.4.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.3.0...v1.4.0)
240
307
  * 2026-01-16 - Create log file in backup and a summary.txt
241
308
  * 2026-01-16 - Run CI tests on macos, too.
@@ -246,9 +313,6 @@ Overview of main changes:
246
313
  * 2026-01-15 - Add tests for rebuild
247
314
  * 2026-01-15 - Add command to "rebuld" the size and hash filesystem database
248
315
  * 2026-01-15 - Add screenshots in the README
249
-
250
- <details><summary>Expand older history entries ...</summary>
251
-
252
316
  * [v1.2.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.1.0...v1.2.0)
253
317
  * 2026-01-15 - Add error handling: Log exception but continue with the backup
254
318
  * 2026-01-15 - Check permission and hadlink support on destination path
@@ -3,5 +3,5 @@
3
3
  """
4
4
 
5
5
  # See https://packaging.python.org/en/latest/specifications/version-specifiers/
6
- __version__ = '1.5.0'
6
+ __version__ = '1.7.0'
7
7
  __author__ = 'Jens Diemer <PyHardLinkBackup@jensdiemer.de>'
@@ -14,6 +14,7 @@ from PyHardLinkBackup.logging_setup import LoggingManager
14
14
  from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
15
15
  from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
16
16
  from PyHardLinkBackup.utilities.filesystem import (
17
+ RemoveFileOnError,
17
18
  copy_and_hash,
18
19
  hash_file,
19
20
  humanized_fs_scan,
@@ -59,6 +60,7 @@ def backup_one_file(
59
60
  hash_db: FileHashDatabase,
60
61
  backup_dir: Path,
61
62
  backup_result: BackupResult,
63
+ progress: DisplayFileTreeProgress,
62
64
  ) -> None:
63
65
  backup_result.backup_count += 1
64
66
  src_path = Path(entry.path)
@@ -70,8 +72,8 @@ def backup_one_file(
70
72
 
71
73
  try:
72
74
  size = entry.stat().st_size
73
- except FileNotFoundError:
74
- # e.g.: Handle broken symlink
75
+ except FileNotFoundError as err:
76
+ logger.warning(f'Broken symlink {src_path}: {err.__class__.__name__}: {err}')
75
77
  target = os.readlink(src_path)
76
78
  dst_path.symlink_to(target)
77
79
  backup_result.symlink_files += 1
@@ -95,65 +97,66 @@ def backup_one_file(
95
97
  # Process regular files
96
98
  assert entry.is_file(follow_symlinks=False), f'Unexpected non-file: {src_path}'
97
99
 
98
- # Deduplication logic
100
+ with RemoveFileOnError(dst_path):
101
+ # Deduplication logic
102
+
103
+ if size < size_db.MIN_SIZE:
104
+ # Small file -> always copy without deduplication
105
+ logger.info('Copy small file: %s to %s', src_path, dst_path)
106
+ file_hash = copy_and_hash(src_path, dst_path, progress=progress, total_size=size)
107
+ backup_result.copied_files += 1
108
+ backup_result.copied_size += size
109
+ backup_result.copied_small_files += 1
110
+ backup_result.copied_small_size += size
111
+ store_hash(dst_path, file_hash)
112
+ return
113
+
114
+ if size in size_db:
115
+ logger.debug('File with size %iBytes found before -> hash: %s', size, src_path)
116
+
117
+ if size <= CHUNK_SIZE:
118
+ # File can be read complete into memory
119
+ logger.debug('File size %iBytes <= CHUNK_SIZE (%iBytes) -> read complete into memory', size, CHUNK_SIZE)
120
+ file_content, file_hash = read_and_hash_file(src_path)
121
+ if existing_path := hash_db.get(file_hash):
122
+ logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
123
+ os.link(existing_path, dst_path)
124
+ backup_result.hardlinked_files += 1
125
+ backup_result.hardlinked_size += size
126
+ else:
127
+ logger.info('Store unique file: %s to %s', src_path, dst_path)
128
+ dst_path.write_bytes(file_content)
129
+ hash_db[file_hash] = dst_path
130
+ backup_result.copied_files += 1
131
+ backup_result.copied_size += size
99
132
 
100
- if size < size_db.MIN_SIZE:
101
- # Small file -> always copy without deduplication
102
- logger.info('Copy small file: %s to %s', src_path, dst_path)
103
- file_hash = copy_and_hash(src_path, dst_path)
104
- backup_result.copied_files += 1
105
- backup_result.copied_size += size
106
- backup_result.copied_small_files += 1
107
- backup_result.copied_small_size += size
108
- store_hash(dst_path, file_hash)
109
- return
110
-
111
- if size in size_db:
112
- logger.debug('File with size %iBytes found before -> hash: %s', size, src_path)
113
-
114
- if size <= CHUNK_SIZE:
115
- # File can be read complete into memory
116
- logger.debug('File size %iBytes <= CHUNK_SIZE (%iBytes) -> read complete into memory', size, CHUNK_SIZE)
117
- file_content, file_hash = read_and_hash_file(src_path)
118
- if existing_path := hash_db.get(file_hash):
119
- logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
120
- os.link(existing_path, dst_path)
121
- backup_result.hardlinked_files += 1
122
- backup_result.hardlinked_size += size
123
133
  else:
124
- logger.info('Store unique file: %s to %s', src_path, dst_path)
125
- dst_path.write_bytes(file_content)
126
- hash_db[file_hash] = dst_path
127
- backup_result.copied_files += 1
128
- backup_result.copied_size += size
129
-
134
+ # Large file
135
+ file_hash = hash_file(src_path, progress=progress, total_size=size) # Calculate hash without copying
136
+
137
+ if existing_path := hash_db.get(file_hash):
138
+ logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
139
+ os.link(existing_path, dst_path)
140
+ backup_result.hardlinked_files += 1
141
+ backup_result.hardlinked_size += size
142
+ else:
143
+ logger.info('Copy unique file: %s to %s', src_path, dst_path)
144
+ shutil.copyfile(src_path, dst_path)
145
+ hash_db[file_hash] = dst_path
146
+ backup_result.copied_files += 1
147
+ backup_result.copied_size += size
148
+
149
+ # Keep original file metadata (permission bits, time stamps, and flags)
150
+ shutil.copystat(src_path, dst_path)
130
151
  else:
131
- # Large file
132
- file_hash = hash_file(src_path) # Calculate hash without copying
133
-
134
- if existing_path := hash_db.get(file_hash):
135
- logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
136
- os.link(existing_path, dst_path)
137
- backup_result.hardlinked_files += 1
138
- backup_result.hardlinked_size += size
139
- else:
140
- logger.info('Copy unique file: %s to %s', src_path, dst_path)
141
- file_hash = copy_and_hash(src_path, dst_path)
142
- hash_db[file_hash] = dst_path
143
- backup_result.copied_files += 1
144
- backup_result.copied_size += size
145
-
146
- # Keep original file metadata (permission bits, time stamps, and flags)
147
- shutil.copystat(src_path, dst_path)
148
- else:
149
- # A file with this size not backuped before -> Can't be duplicate -> copy and hash
150
- file_hash = copy_and_hash(src_path, dst_path)
151
- size_db.add(size)
152
- hash_db[file_hash] = dst_path
153
- backup_result.copied_files += 1
154
- backup_result.copied_size += size
152
+ # A file with this size not backuped before -> Can't be duplicate -> copy and hash
153
+ file_hash = copy_and_hash(src_path, dst_path, progress=progress, total_size=size)
154
+ size_db.add(size)
155
+ hash_db[file_hash] = dst_path
156
+ backup_result.copied_files += 1
157
+ backup_result.copied_size += size
155
158
 
156
- store_hash(dst_path, file_hash)
159
+ store_hash(dst_path, file_hash)
157
160
 
158
161
 
159
162
  def backup_tree(
@@ -205,7 +208,11 @@ def backup_tree(
205
208
 
206
209
  print(f'\nBackup to {backup_dir}...\n')
207
210
 
208
- with DisplayFileTreeProgress(src_file_count, src_total_size) as progress:
211
+ with DisplayFileTreeProgress(
212
+ description=f'Backup {src_root}...',
213
+ total_file_count=src_file_count,
214
+ total_size=src_total_size,
215
+ ) as progress:
209
216
  # "Databases" for deduplication
210
217
  size_db = FileSizeDatabase(phlb_conf_dir)
211
218
  hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
@@ -222,6 +229,7 @@ def backup_tree(
222
229
  hash_db=hash_db,
223
230
  backup_dir=backup_dir,
224
231
  backup_result=backup_result,
232
+ progress=progress,
225
233
  )
226
234
  except Exception as err:
227
235
  logger.exception(f'Backup {entry.path} {err.__class__.__name__}: {err}')
@@ -26,6 +26,7 @@ logger = logging.getLogger(__name__)
26
26
 
27
27
  @dataclasses.dataclass
28
28
  class CompareResult:
29
+ last_timestamp: str
29
30
  compare_dir: Path
30
31
  log_file: Path
31
32
  #
@@ -52,6 +53,7 @@ def compare_one_file(
52
53
  hash_db: FileHashDatabase,
53
54
  compare_dir: Path,
54
55
  compare_result: CompareResult,
56
+ progress: DisplayFileTreeProgress,
55
57
  ) -> None:
56
58
  src_size = entry.stat().st_size
57
59
 
@@ -79,8 +81,8 @@ def compare_one_file(
79
81
  compare_result.file_size_missmatch += 1
80
82
  return
81
83
 
82
- src_hash = hash_file(src_path)
83
- dst_hash = hash_file(dst_path)
84
+ src_hash = hash_file(src_path, progress=progress, total_size=src_size)
85
+ dst_hash = hash_file(dst_path, progress=progress, total_size=dst_size)
84
86
 
85
87
  if src_hash != dst_hash:
86
88
  logger.warning(
@@ -157,12 +159,16 @@ def compare_tree(
157
159
  with PrintTimingContextManager('Filesystem scan completed in'):
158
160
  src_file_count, src_total_size = humanized_fs_scan(src_root, excludes=excludes)
159
161
 
160
- with DisplayFileTreeProgress(src_file_count, src_total_size) as progress:
162
+ with DisplayFileTreeProgress(
163
+ description=f'Compare {src_root}...',
164
+ total_file_count=src_file_count,
165
+ total_size=src_total_size,
166
+ ) as progress:
161
167
  # init "databases":
162
168
  size_db = FileSizeDatabase(phlb_conf_dir)
163
169
  hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
164
170
 
165
- compare_result = CompareResult(compare_dir=compare_dir, log_file=log_file)
171
+ compare_result = CompareResult(last_timestamp=last_timestamp, compare_dir=compare_dir, log_file=log_file)
166
172
 
167
173
  next_update = 0
168
174
  for entry in iter_scandir_files(src_root, excludes=excludes):
@@ -174,6 +180,7 @@ def compare_tree(
174
180
  hash_db=hash_db,
175
181
  compare_dir=compare_dir,
176
182
  compare_result=compare_result,
183
+ progress=progress,
177
184
  )
178
185
  except Exception as err:
179
186
  logger.exception(f'Compare {entry.path} {err.__class__.__name__}: {err}')
@@ -183,12 +190,12 @@ def compare_tree(
183
190
  if now >= next_update:
184
191
  progress.update(
185
192
  completed_file_count=compare_result.total_file_count,
186
- completed_size=compare_result.total_size,
193
+ advance_size=compare_result.total_size,
187
194
  )
188
195
  next_update = now + 0.5
189
196
 
190
197
  # Finalize progress indicator values:
191
- progress.update(completed_file_count=compare_result.total_file_count, completed_size=compare_result.total_size)
198
+ progress.update(completed_file_count=compare_result.total_file_count, advance_size=compare_result.total_size)
192
199
 
193
200
  summary_file = compare_main_dir / f'{now_timestamp}-summary.txt'
194
201
  with TeeStdoutContext(summary_file):
@@ -11,6 +11,8 @@ BASE_PATH = Path(PyHardLinkBackup.__file__).parent
11
11
  ##########################################################################
12
12
  # "Settings" for PyHardLinkBackup:
13
13
 
14
- CHUNK_SIZE = 64 * 1024 * 1024 # 64 MB
15
- SMALL_FILE_THRESHOLD = 1000 # bytes
16
14
  HASH_ALGO = 'sha256'
15
+ SMALL_FILE_THRESHOLD = 1000 # bytes
16
+ CHUNK_SIZE = 64 * 1024 * 1024 # 64 MB
17
+ LAGE_FILE_PROGRESS_MIN_SIZE = CHUNK_SIZE * 3
18
+
@@ -41,6 +41,7 @@ def rebuild_one_file(
41
41
  size_db: FileSizeDatabase,
42
42
  hash_db: FileHashDatabase,
43
43
  rebuild_result: RebuildResult,
44
+ progress: DisplayFileTreeProgress,
44
45
  ):
45
46
  file_path = Path(entry.path)
46
47
 
@@ -62,7 +63,7 @@ def rebuild_one_file(
62
63
  # Small files will never deduplicate, skip them
63
64
  return
64
65
 
65
- file_hash = hash_file(file_path)
66
+ file_hash = hash_file(file_path, progress=progress, total_size=size)
66
67
 
67
68
  if size not in size_db:
68
69
  size_db.add(size)
@@ -121,7 +122,11 @@ def rebuild(
121
122
  file_count -= 1
122
123
  total_size -= file.stat().st_size
123
124
 
124
- with DisplayFileTreeProgress(file_count, total_size) as progress:
125
+ with DisplayFileTreeProgress(
126
+ description=f'Rebuild {backup_root}...',
127
+ total_file_count=file_count,
128
+ total_size=total_size,
129
+ ) as progress:
125
130
  # "Databases" for deduplication
126
131
  size_db = FileSizeDatabase(phlb_conf_dir)
127
132
  hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
@@ -137,6 +142,7 @@ def rebuild(
137
142
  size_db=size_db,
138
143
  hash_db=hash_db,
139
144
  rebuild_result=rebuild_result,
145
+ progress=progress,
140
146
  )
141
147
  except Exception as err:
142
148
  logger.exception(f'Backup {entry.path} {err.__class__.__name__}: {err}')
@@ -145,12 +151,12 @@ def rebuild(
145
151
  now = time.monotonic()
146
152
  if now >= next_update:
147
153
  progress.update(
148
- completed_file_count=rebuild_result.process_count, completed_size=rebuild_result.process_size
154
+ completed_file_count=rebuild_result.process_count, advance_size=rebuild_result.process_size
149
155
  )
150
156
  next_update = now + 0.5
151
157
 
152
158
  # Finalize progress indicator values:
153
- progress.update(completed_file_count=rebuild_result.process_count, completed_size=rebuild_result.process_size)
159
+ progress.update(completed_file_count=rebuild_result.process_count, advance_size=rebuild_result.process_size)
154
160
 
155
161
  summary_file = backup_root / f'{timestamp}-rebuild-summary.txt'
156
162
  with TeeStdoutContext(summary_file):