PyHardLinkBackup 1.2.0__tar.gz → 1.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.github/workflows/tests.yml +2 -1
  2. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PKG-INFO +54 -27
  3. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/__init__.py +1 -1
  4. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/backup.py +52 -44
  5. pyhardlinkbackup-1.4.0/PyHardLinkBackup/cli_app/phlb.py +87 -0
  6. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/cli_dev/benchmark.py +51 -32
  7. pyhardlinkbackup-1.4.0/PyHardLinkBackup/logging_setup.py +124 -0
  8. pyhardlinkbackup-1.4.0/PyHardLinkBackup/rebuild_databases.py +176 -0
  9. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/tests/test_backup.py +83 -29
  10. pyhardlinkbackup-1.4.0/PyHardLinkBackup/tests/test_rebuild_database.py +224 -0
  11. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/file_hash_database.py +4 -0
  12. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/humanize.py +17 -0
  13. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/rich_utils.py +8 -8
  14. pyhardlinkbackup-1.4.0/PyHardLinkBackup/utilities/sha256sums.py +61 -0
  15. pyhardlinkbackup-1.4.0/PyHardLinkBackup/utilities/tee.py +40 -0
  16. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +3 -1
  17. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/tests/test_file_size_database.py +1 -1
  18. pyhardlinkbackup-1.4.0/PyHardLinkBackup/utilities/tyro_cli_shared_args.py +12 -0
  19. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/README.md +53 -26
  20. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/docs/README.md +8 -0
  21. pyhardlinkbackup-1.2.0/PyHardLinkBackup/cli_app/phlb.py +0 -50
  22. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.editorconfig +0 -0
  23. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.gitignore +0 -0
  24. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.idea/.gitignore +0 -0
  25. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.pre-commit-config.yaml +0 -0
  26. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.pre-commit-hooks.yaml +0 -0
  27. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.run/Template Python tests.run.xml +0 -0
  28. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.run/Unittests - __all__.run.xml +0 -0
  29. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.run/cli.py --help.run.xml +0 -0
  30. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.run/dev-cli update.run.xml +0 -0
  31. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.run/only DocTests.run.xml +0 -0
  32. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.run/only DocWrite.run.xml +0 -0
  33. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.venv-app/lib/python3.12/site-packages/cli_base/tests/shell_complete_snapshots/.gitignore +0 -0
  34. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/__main__.py +0 -0
  35. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/cli_app/__init__.py +0 -0
  36. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/cli_dev/__init__.py +0 -0
  37. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/cli_dev/code_style.py +0 -0
  38. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/cli_dev/packaging.py +0 -0
  39. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/cli_dev/shell_completion.py +0 -0
  40. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/cli_dev/testing.py +0 -0
  41. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/cli_dev/update_readme_history.py +0 -0
  42. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/constants.py +0 -0
  43. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/tests/__init__.py +0 -0
  44. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/tests/test_doc_write.py +0 -0
  45. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/tests/test_doctests.py +0 -0
  46. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/tests/test_project_setup.py +0 -0
  47. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/tests/test_readme.py +0 -0
  48. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/tests/test_readme_history.py +0 -0
  49. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/__init__.py +0 -0
  50. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/file_size_database.py +0 -0
  51. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/filesystem.py +0 -0
  52. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/tests/__init__.py +0 -0
  53. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/tests/test_filesystem.py +0 -0
  54. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/cli.py +0 -0
  55. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/dev-cli.py +0 -0
  56. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/dist/.gitignore +0 -0
  57. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/docs/about-docs.md +0 -0
  58. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/noxfile.py +0 -0
  59. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/pyproject.toml +0 -0
  60. {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/uv.lock +0 -0
@@ -13,11 +13,12 @@ on:
13
13
 
14
14
  jobs:
15
15
  test:
16
- runs-on: ubuntu-latest
17
16
  strategy:
18
17
  fail-fast: false
19
18
  matrix:
20
19
  python-version: ["3.14", "3.13", "3.12"]
20
+ os: [ubuntu-latest, macos-latest] # TODO: windows-latest
21
+ runs-on: ${{ matrix.os }}
21
22
  steps:
22
23
  - name: Checkout
23
24
  run: |
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PyHardLinkBackup
3
- Version: 1.2.0
3
+ Version: 1.4.0
4
4
  Summary: HardLink/Deduplication Backups with Python
5
5
  Project-URL: Documentation, https://github.com/jedie/PyHardLinkBackup
6
6
  Project-URL: Source, https://github.com/jedie/PyHardLinkBackup
@@ -42,43 +42,58 @@ The main command is `phlb backup <source> <destination>`:
42
42
 
43
43
  [comment]: <> (✂✂✂ auto generated backup help start ✂✂✂)
44
44
  ```
45
- usage: phlb backup [-h] source destination [--excludes [STR [STR ...]]] [-v]
45
+ usage: phlb backup [-h] [BACKUP OPTIONS]
46
46
 
47
47
  Backup the source directory to the destination directory using hard links for deduplication.
48
48
 
49
- ╭─ positional arguments ───────────────────────────────────────────────────────────────────────────────────────────────╮
50
- │ source Source directory to back up. (required)
51
- │ destination Destination directory for the backup. (required)
52
- ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
53
- ╭─ options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
54
- │ -h, --help show this help message and exit
55
- │ --excludes [STR [STR ...]]
56
- │ List of directory or file names to exclude from backup. (default: __pycache__ .cache .temp .tmp .tox
57
- .nox)
58
- -v, --verbosity
59
- Verbosity level; e.g.: -v, -vv, -vvv, etc. (repeatable)
60
- ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
49
+ ╭─ positional arguments ──────────────────────────────────────────────────────────────────────────────────────╮
50
+ │ source Source directory to back up. (required)
51
+ │ destination Destination directory for the backup. (required)
52
+ ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
53
+ ╭─ options ───────────────────────────────────────────────────────────────────────────────────────────────────╮
54
+ │ -h, --help show this help message and exit
55
+ │ --excludes [STR [STR ...]]
56
+ │ List of directories to exclude from backup. (default: __pycache__ .cache .temp .tmp .tox .nox)
57
+ --verbosity {debug,info,warning,error}
58
+ Log level for console logging. (default: warning)
59
+ --log-file-level {debug,info,warning,error}
60
+ │ Log level for the log file (default: info) │
61
+ ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
61
62
  ```
62
63
  [comment]: <> (✂✂✂ auto generated backup help end ✂✂✂)
63
64
 
64
65
 
65
66
 
67
+ Running a backup looks like:
68
+
69
+ ![2026-01-15-phlb1.png](https://raw.githubusercontent.com/jedie/jedie.github.io/main/screenshots/PyHardLinkBackup/2026-01-15-phlb1.png "2026-01-15-phlb1.png")
70
+
71
+
72
+
73
+ If it's finished it display a summary:
74
+
75
+ ![2026-01-15-phlb2.png](https://raw.githubusercontent.com/jedie/jedie.github.io/main/screenshots/PyHardLinkBackup/2026-01-15-phlb2.png "2026-01-15-phlb2.png")
76
+
77
+
78
+
66
79
  complete help for main CLI app:
67
80
 
68
81
  [comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
69
82
  ```
70
- usage: phlb [-h] {backup,version}
83
+ usage: phlb [-h] {backup,rebuild,version}
71
84
 
72
85
 
73
86
 
74
- ╭─ options ─────────────────────────────────────────────────────────────────────────────────────────────────╮
75
- │ -h, --help show this help message and exit
76
- ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────╯
77
- ╭─ subcommands ─────────────────────────────────────────────────────────────────────────────────────────────╮
78
- │ (required)
79
- │ • backup Backup the source directory to the destination directory using hard links for deduplication.
80
- │ • version Print version and exit
81
- ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────╯
87
+ ╭─ options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
88
+ │ -h, --help show this help message and exit
89
+ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
90
+ ╭─ subcommands ────────────────────────────────────────────────────────────────────────────────────────────────────────╮
91
+ │ (required)
92
+ │ • backup Backup the source directory to the destination directory using hard links for deduplication.
93
+ │ • rebuild Rebuild the file hash and size database by scanning all backup files. And also verify SHA256SUMS and/or
94
+ │ store missing hashes in SHA256SUMS files. │
95
+ │ • version Print version and exit │
96
+ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
82
97
  ```
83
98
  [comment]: <> (✂✂✂ auto generated main help end ✂✂✂)
84
99
 
@@ -160,7 +175,7 @@ The file contains only the relative path to the first hardlink of this file cont
160
175
 
161
176
  [comment]: <> (✂✂✂ auto generated dev help start ✂✂✂)
162
177
  ```
163
- usage: ./dev-cli.py [-h] {benchmark-hashes,coverage,install,lint,mypy,nox,pip-audit,publish,shell-completion,test,update,update-readme-history,update-test-snapshot-files,version}
178
+ usage: ./dev-cli.py [-h] {benchmark-hashes,coverage,install,lint,mypy,nox,pip-audit,publish,scan-benchmark,shell-completion,test,update,update-readme-history,update-test-snapshot-files,version}
164
179
 
165
180
 
166
181
 
@@ -178,6 +193,8 @@ usage: ./dev-cli.py [-h] {benchmark-hashes,coverage,install,lint,mypy,nox,pip-au
178
193
  │ • nox Run nox │
179
194
  │ • pip-audit Run pip-audit check against current requirements files │
180
195
  │ • publish Build and upload this project to PyPi │
196
+ │ • scan-benchmark │
197
+ │ Benchmark our filesystem scan routine. │
181
198
  │ • shell-completion │
182
199
  │ Setup shell completion for this CLI (Currently only for bash shell) │
183
200
  │ • test Run unittests │
@@ -214,6 +231,16 @@ Overview of main changes:
214
231
 
215
232
  [comment]: <> (✂✂✂ auto generated history start ✂✂✂)
216
233
 
234
+ * [v1.4.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.3.0...v1.4.0)
235
+ * 2026-01-16 - Create log file in backup and a summary.txt
236
+ * 2026-01-16 - Run CI tests on macos, too.
237
+ * 2026-01-16 - add dev cli command "scan-benchmark"
238
+ * [v1.3.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.2.0...v1.3.0)
239
+ * 2026-01-15 - Verify SHA256SUMS files in "rebuild" command, too.
240
+ * 2026-01-15 - Code cleanup: use more generic names for and in BackupProgress
241
+ * 2026-01-15 - Add tests for rebuild
242
+ * 2026-01-15 - Add command to "rebuld" the size and hash filesystem database
243
+ * 2026-01-15 - Add screenshots in the README
217
244
  * [v1.2.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.1.0...v1.2.0)
218
245
  * 2026-01-15 - Add error handling: Log exception but continue with the backup
219
246
  * 2026-01-15 - Check permission and hadlink support on destination path
@@ -223,6 +250,9 @@ Overview of main changes:
223
250
  * [v1.1.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.1...v1.1.0)
224
251
  * 2026-01-14 - Change backup timestamp directory to old schema: '%Y-%m-%d-%H%M%S'
225
252
  * 2026-01-14 - Add "Overview of main changes" to README
253
+
254
+ <details><summary>Expand older history entries ...</summary>
255
+
226
256
  * [v1.0.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.0...v1.0.1)
227
257
  * 2026-01-13 - Store SHA256SUMS files in backup directories
228
258
  * [v1.0.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.13.0...v1.0.0)
@@ -235,9 +265,6 @@ Overview of main changes:
235
265
  * 2026-01-13 - Add DocWrite, handle broken symlinks, keep file meta, handle missing hardlink sources
236
266
  * 2026-01-12 - First working iteration with rich progess bar
237
267
  * 2026-01-08 - Rewrite everything
238
-
239
- <details><summary>Expand older history entries ...</summary>
240
-
241
268
  * [v0.13.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.3...v0.13.0)
242
269
  * 2020-03-18 - release v0.13.0
243
270
  * 2020-03-17 - deactivate pypy tests in travis, because of SQLite errors, like:
@@ -3,5 +3,5 @@
3
3
  """
4
4
 
5
5
  # See https://packaging.python.org/en/latest/specifications/version-specifiers/
6
- __version__ = '1.2.0'
6
+ __version__ = '1.4.0'
7
7
  __author__ = 'Jens Diemer <PyHardLinkBackup@jensdiemer.de>'
@@ -1,15 +1,16 @@
1
1
  import dataclasses
2
+ import datetime
2
3
  import logging
3
4
  import os
4
5
  import shutil
5
6
  import sys
6
7
  import time
7
- from datetime import datetime
8
8
  from pathlib import Path
9
9
 
10
10
  from rich import print # noqa
11
11
 
12
12
  from PyHardLinkBackup.constants import CHUNK_SIZE
13
+ from PyHardLinkBackup.logging_setup import LoggingManager
13
14
  from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
14
15
  from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
15
16
  from PyHardLinkBackup.utilities.filesystem import (
@@ -20,8 +21,10 @@ from PyHardLinkBackup.utilities.filesystem import (
20
21
  read_and_hash_file,
21
22
  supports_hardlinks,
22
23
  )
23
- from PyHardLinkBackup.utilities.humanize import human_filesize
24
- from PyHardLinkBackup.utilities.rich_utils import BackupProgress
24
+ from PyHardLinkBackup.utilities.humanize import PrintTimingContextManager, human_filesize
25
+ from PyHardLinkBackup.utilities.rich_utils import DisplayFileTreeProgress
26
+ from PyHardLinkBackup.utilities.sha256sums import store_hash
27
+ from PyHardLinkBackup.utilities.tee import TeeStdoutContext
25
28
 
26
29
 
27
30
  logger = logging.getLogger(__name__)
@@ -30,6 +33,7 @@ logger = logging.getLogger(__name__)
30
33
  @dataclasses.dataclass
31
34
  class BackupResult:
32
35
  backup_dir: Path
36
+ log_file: Path
33
37
  #
34
38
  backup_count: int = 0
35
39
  backup_size: int = 0
@@ -151,23 +155,13 @@ def backup_one_file(
151
155
  store_hash(dst_path, file_hash)
152
156
 
153
157
 
154
- def store_hash(file_path: Path, file_hash: str):
155
- """DocWrite: README.md ## SHA256SUMS
156
- A `SHA256SUMS` file is stored in each backup directory containing the SHA256 hashes of all files in that directory.
157
- It's the same format as e.g.: `sha256sum * > SHA256SUMS` command produces.
158
- So it's possible to verify the integrity of the backup files later.
159
- e.g.:
160
- ```bash
161
- cd .../your/backup/foobar/20240101_120000/
162
- sha256sum -c SHA256SUMS
163
- ```
164
- """
165
- hash_file_path = file_path.parent / 'SHA256SUMS'
166
- with hash_file_path.open('a') as f:
167
- f.write(f'{file_hash} {file_path.name}\n')
168
-
169
-
170
- def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> BackupResult:
158
+ def backup_tree(
159
+ *,
160
+ src_root: Path,
161
+ backup_root: Path,
162
+ excludes: tuple[str, ...],
163
+ log_manager: LoggingManager,
164
+ ) -> BackupResult:
171
165
  src_root = src_root.resolve()
172
166
  if not src_root.is_dir():
173
167
  print('Error: Source directory does not exist!')
@@ -191,23 +185,31 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
191
185
  sys.exit(1)
192
186
 
193
187
  # Step 1: Scan source directory:
194
- src_file_count, src_total_size = humanized_fs_scan(src_root, excludes)
188
+ excludes: set = set(excludes)
189
+ with PrintTimingContextManager('Filesystem scan completed in'):
190
+ src_file_count, src_total_size = humanized_fs_scan(src_root, excludes=excludes)
195
191
 
196
192
  phlb_conf_dir = backup_root / '.phlb'
197
193
  phlb_conf_dir.mkdir(parents=False, exist_ok=True)
198
194
 
199
- backup_dir = backup_root / src_root.name / datetime.now().strftime('%Y-%m-%d-%H%M%S')
200
- logger.info('Backup %s to %s', src_root, backup_dir)
195
+ timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S')
196
+ backup_main_dir = backup_root / src_root.name
197
+ backup_dir = backup_main_dir / timestamp
201
198
  backup_dir.mkdir(parents=True, exist_ok=False)
202
199
 
200
+ log_file = backup_main_dir / f'{timestamp}-backup.log'
201
+ log_manager.start_file_logging(log_file)
202
+
203
+ logger.info('Backup %s to %s', src_root, backup_dir)
204
+
203
205
  print(f'\nBackup to {backup_dir}...\n')
204
206
 
205
- with BackupProgress(src_file_count, src_total_size) as progress:
207
+ with DisplayFileTreeProgress(src_file_count, src_total_size) as progress:
206
208
  # "Databases" for deduplication
207
209
  size_db = FileSizeDatabase(phlb_conf_dir)
208
210
  hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
209
211
 
210
- backup_result = BackupResult(backup_dir=backup_dir)
212
+ backup_result = BackupResult(backup_dir=backup_dir, log_file=log_file)
211
213
 
212
214
  next_update = 0
213
215
  for entry in iter_scandir_files(src_root, excludes=excludes):
@@ -226,27 +228,33 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
226
228
  else:
227
229
  now = time.monotonic()
228
230
  if now >= next_update:
229
- progress.update(backup_count=backup_result.backup_count, backup_size=backup_result.backup_size)
231
+ progress.update(
232
+ completed_file_count=backup_result.backup_count, completed_size=backup_result.backup_size
233
+ )
230
234
  next_update = now + 0.5
231
235
 
232
236
  # Finalize progress indicator values:
233
- progress.update(backup_count=backup_result.backup_count, backup_size=backup_result.backup_size)
234
-
235
- print(f'\nBackup complete: {backup_dir} (total size {human_filesize(backup_result.backup_size)})\n')
236
- print(f' Total files processed: {backup_result.backup_count}')
237
- print(f' * Symlinked files: {backup_result.symlink_files}')
238
- print(
239
- f' * Hardlinked files: {backup_result.hardlinked_files}'
240
- f' (saved {human_filesize(backup_result.hardlinked_size)})'
241
- )
242
- print(f' * Copied files: {backup_result.copied_files} (total {human_filesize(backup_result.copied_size)})')
243
- print(
244
- f' of which small (<{size_db.MIN_SIZE} Bytes)'
245
- f' files: {backup_result.copied_small_files}'
246
- f' (total {human_filesize(backup_result.copied_small_size)})'
247
- )
248
- if backup_result.error_count > 0:
249
- print(f' Errors during backup: {backup_result.error_count} (see log for details)')
250
- print()
237
+ progress.update(completed_file_count=backup_result.backup_count, completed_size=backup_result.backup_size)
238
+
239
+ summary_file = backup_main_dir / f'{timestamp}-summary.txt'
240
+ with TeeStdoutContext(summary_file):
241
+ print(f'\nBackup complete: {backup_dir} (total size {human_filesize(backup_result.backup_size)})\n')
242
+ print(f' Total files processed: {backup_result.backup_count}')
243
+ print(f' * Symlinked files: {backup_result.symlink_files}')
244
+ print(
245
+ f' * Hardlinked files: {backup_result.hardlinked_files}'
246
+ f' (saved {human_filesize(backup_result.hardlinked_size)})'
247
+ )
248
+ print(f' * Copied files: {backup_result.copied_files} (total {human_filesize(backup_result.copied_size)})')
249
+ print(
250
+ f' of which small (<{size_db.MIN_SIZE} Bytes)'
251
+ f' files: {backup_result.copied_small_files}'
252
+ f' (total {human_filesize(backup_result.copied_small_size)})'
253
+ )
254
+ if backup_result.error_count > 0:
255
+ print(f' Errors during backup: {backup_result.error_count} (see log for details)')
256
+ print()
257
+
258
+ logger.info('Backup completed. Summary created: %s', summary_file)
251
259
 
252
260
  return backup_result
@@ -0,0 +1,87 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import Annotated
4
+
5
+ import tyro
6
+ from rich import print # noqa
7
+
8
+ from PyHardLinkBackup import rebuild_databases
9
+ from PyHardLinkBackup.backup import backup_tree
10
+ from PyHardLinkBackup.cli_app import app
11
+ from PyHardLinkBackup.logging_setup import (
12
+ DEFAULT_CONSOLE_LOG_LEVEL,
13
+ DEFAULT_LOG_FILE_LEVEL,
14
+ LoggingManager,
15
+ TyroConsoleLogLevelArgType,
16
+ TyroLogFileLevelArgType,
17
+ )
18
+ from PyHardLinkBackup.utilities.tyro_cli_shared_args import (
19
+ DEFAULT_EXCLUDE_DIRECTORIES,
20
+ TyroExcludeDirectoriesArgType,
21
+ )
22
+
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ @app.command
28
+ def backup(
29
+ src: Annotated[
30
+ Path,
31
+ tyro.conf.arg(
32
+ metavar='source',
33
+ help='Source directory to back up.',
34
+ ),
35
+ ],
36
+ dst: Annotated[
37
+ Path,
38
+ tyro.conf.arg(
39
+ metavar='destination',
40
+ help='Destination directory for the backup.',
41
+ ),
42
+ ],
43
+ /,
44
+ excludes: TyroExcludeDirectoriesArgType = DEFAULT_EXCLUDE_DIRECTORIES,
45
+ verbosity: TyroConsoleLogLevelArgType = DEFAULT_CONSOLE_LOG_LEVEL,
46
+ log_file_level: TyroLogFileLevelArgType = DEFAULT_LOG_FILE_LEVEL,
47
+ ) -> None:
48
+ """
49
+ Backup the source directory to the destination directory using hard links for deduplication.
50
+ """
51
+ log_manager = LoggingManager(
52
+ console_level=verbosity,
53
+ file_level=log_file_level,
54
+ )
55
+ backup_tree(
56
+ src_root=src,
57
+ backup_root=dst,
58
+ excludes=excludes,
59
+ log_manager=log_manager,
60
+ )
61
+
62
+
63
+ @app.command
64
+ def rebuild(
65
+ backup_root: Annotated[
66
+ Path,
67
+ tyro.conf.arg(
68
+ metavar='backup-directory',
69
+ help='Root directory of the the backups.',
70
+ ),
71
+ ],
72
+ /,
73
+ verbosity: TyroConsoleLogLevelArgType = DEFAULT_CONSOLE_LOG_LEVEL,
74
+ log_file_level: TyroLogFileLevelArgType = DEFAULT_LOG_FILE_LEVEL,
75
+ ) -> None:
76
+ """
77
+ Rebuild the file hash and size database by scanning all backup files. And also verify SHA256SUMS
78
+ and/or store missing hashes in SHA256SUMS files.
79
+ """
80
+ log_manager = LoggingManager(
81
+ console_level=verbosity,
82
+ file_level=log_file_level,
83
+ )
84
+ rebuild_databases.rebuild(
85
+ backup_root=backup_root,
86
+ log_manager=log_manager,
87
+ )
@@ -10,16 +10,34 @@ from cli_base.tyro_commands import TyroVerbosityArgType
10
10
  from rich import print # noqa
11
11
 
12
12
  from PyHardLinkBackup.cli_dev import app
13
- from PyHardLinkBackup.utilities.filesystem import iter_scandir_files
13
+ from PyHardLinkBackup.utilities.filesystem import humanized_fs_scan, iter_scandir_files
14
+ from PyHardLinkBackup.utilities.humanize import PrintTimingContextManager
15
+ from PyHardLinkBackup.utilities.tyro_cli_shared_args import DEFAULT_EXCLUDE_DIRECTORIES, TyroExcludeDirectoriesArgType
14
16
 
15
17
 
16
18
  logger = logging.getLogger(__name__)
17
19
 
18
20
 
21
+ @app.command
22
+ def scan_benchmark(
23
+ base_path: Path,
24
+ /,
25
+ excludes: TyroExcludeDirectoriesArgType = DEFAULT_EXCLUDE_DIRECTORIES,
26
+ verbosity: TyroVerbosityArgType = 1,
27
+ ) -> None:
28
+ """
29
+ Benchmark our filesystem scan routine.
30
+ """
31
+ setup_logging(verbosity=verbosity)
32
+ with PrintTimingContextManager('Filesystem scan completed in'):
33
+ humanized_fs_scan(path=base_path, excludes=set(excludes))
34
+
35
+
19
36
  @app.command
20
37
  def benchmark_hashes(
21
38
  base_path: Path,
22
39
  /,
40
+ excludes: TyroExcludeDirectoriesArgType = DEFAULT_EXCLUDE_DIRECTORIES,
23
41
  max_duration: int = 30, # in seconds
24
42
  min_file_size: int = 15 * 1024, # 15 KiB
25
43
  max_file_size: int = 100 * 1024 * 1024, # 100 MiB
@@ -70,40 +88,41 @@ def benchmark_hashes(
70
88
  stop_time = start_time + max_duration
71
89
  next_update = start_time + 2
72
90
 
73
- for dir_entry in iter_scandir_files(base_path):
74
- entry_stat = dir_entry.stat()
75
- file_size = entry_stat.st_size
76
- if not (min_file_size <= file_size <= max_file_size):
77
- continue
91
+ with PrintTimingContextManager('Filesystem scan completed in'):
92
+ for dir_entry in iter_scandir_files(path=base_path, excludes=set(excludes)):
93
+ entry_stat = dir_entry.stat()
94
+ file_size = entry_stat.st_size
95
+ if not (min_file_size <= file_size <= max_file_size):
96
+ continue
78
97
 
79
- start_time = time.perf_counter()
80
- file_content = Path(dir_entry.path).read_bytes()
81
- duration = time.perf_counter() - start_time
82
- total_read_time += duration
83
-
84
- for algo in algorithms:
85
- # Actual measurement:
86
98
  start_time = time.perf_counter()
87
- hashlib.new(algo, file_content)
99
+ file_content = Path(dir_entry.path).read_bytes()
88
100
  duration = time.perf_counter() - start_time
89
-
90
- results[algo].add(duration)
91
-
92
- file_count += 1
93
- total_size += entry_stat.st_size
94
-
95
- now = time.time()
96
- if now >= stop_time:
97
- print('Reached max duration limit, stopping benchmark...')
98
- break
99
-
100
- if now >= next_update:
101
- percent = (now - (stop_time - max_duration)) / max_duration * 100
102
- print(
103
- f'{int(percent)}% Processed {file_count} files so far,'
104
- f' total size: {total_size / 1024 / 1024:.1f} MiB...'
105
- )
106
- next_update = now + 2
101
+ total_read_time += duration
102
+
103
+ for algo in algorithms:
104
+ # Actual measurement:
105
+ start_time = time.perf_counter()
106
+ hashlib.new(algo, file_content)
107
+ duration = time.perf_counter() - start_time
108
+
109
+ results[algo].add(duration)
110
+
111
+ file_count += 1
112
+ total_size += entry_stat.st_size
113
+
114
+ now = time.time()
115
+ if now >= stop_time:
116
+ print('Reached max duration limit, stopping benchmark...')
117
+ break
118
+
119
+ if now >= next_update:
120
+ percent = (now - (stop_time - max_duration)) / max_duration * 100
121
+ print(
122
+ f'{int(percent)}% Processed {file_count} files so far,'
123
+ f' total size: {total_size / 1024 / 1024:.1f} MiB...'
124
+ )
125
+ next_update = now + 2
107
126
 
108
127
  print(f'\nTotal files hashed: {file_count}, total size: {total_size / 1024 / 1024:.1f} MiB')
109
128
 
@@ -0,0 +1,124 @@
1
+ import logging
2
+ import sys
3
+ from pathlib import Path
4
+ from typing import Annotated, Literal
5
+
6
+ import tyro
7
+ from bx_py_utils.path import assert_is_dir
8
+ from rich import (
9
+ get_console,
10
+ print, # noqa
11
+ )
12
+ from rich.logging import RichHandler
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ LogLevelLiteral = Literal['debug', 'info', 'warning', 'error']
18
+
19
+
20
+ TyroConsoleLogLevelArgType = Annotated[
21
+ LogLevelLiteral,
22
+ tyro.conf.arg(
23
+ help='Log level for console logging.',
24
+ ),
25
+ ]
26
+ DEFAULT_CONSOLE_LOG_LEVEL: TyroConsoleLogLevelArgType = 'warning'
27
+
28
+
29
+ TyroLogFileLevelArgType = Annotated[
30
+ LogLevelLiteral,
31
+ tyro.conf.arg(
32
+ help='Log level for the log file',
33
+ ),
34
+ ]
35
+ DEFAULT_LOG_FILE_LEVEL: TyroLogFileLevelArgType = 'info'
36
+
37
+
38
+ def log_level_name2int(level_name: str) -> int:
39
+ level_name = level_name.upper()
40
+ level_mapping = logging.getLevelNamesMapping()
41
+ try:
42
+ return level_mapping[level_name]
43
+ except KeyError as err:
44
+ raise ValueError(f'Invalid log level name: {level_name}') from err
45
+
46
+
47
+ console = get_console()
48
+
49
+
50
+ class LoggingManager:
51
+ def __init__(
52
+ self,
53
+ *,
54
+ console_level: TyroConsoleLogLevelArgType,
55
+ file_level: TyroLogFileLevelArgType,
56
+ ):
57
+ self.console_level_name = console_level
58
+ self.console_level: int = log_level_name2int(console_level)
59
+ self.file_level_name = file_level
60
+ self.file_level: int = log_level_name2int(file_level)
61
+
62
+ self.lowest_level = min(self.console_level, self.file_level)
63
+
64
+ if console_level == logging.DEBUG:
65
+ log_format = '(%(name)s) %(message)s'
66
+ else:
67
+ log_format = '%(message)s'
68
+
69
+ console.print(
70
+ f'(Set [bold]console[bold] log level: [cyan]{self.console_level_name}[/cyan])',
71
+ justify='right',
72
+ )
73
+ handler = RichHandler(console=console, omit_repeated_times=False)
74
+ handler.setLevel(self.console_level)
75
+ logging.basicConfig(
76
+ level=self.lowest_level,
77
+ format=log_format,
78
+ datefmt='[%x %X.%f]',
79
+ handlers=[handler],
80
+ force=True,
81
+ )
82
+ sys.excepthook = self.log_unhandled_exception
83
+
84
+ def start_file_logging(self, log_file: Path):
85
+ console.print(
86
+ f'(initialize log file [bold]{log_file}[/bold] with level: [cyan]{self.file_level_name}[/cyan])',
87
+ justify='right',
88
+ )
89
+
90
+ assert_is_dir(log_file.parent)
91
+
92
+ root_logger = logging.getLogger()
93
+
94
+ file_handler = logging.FileHandler(log_file, mode='a', encoding='utf-8')
95
+ file_handler.setLevel(self.file_level)
96
+
97
+ formatter = logging.Formatter(
98
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
99
+ datefmt='%Y-%m-%d %H:%M:%S',
100
+ )
101
+ file_handler.setFormatter(formatter)
102
+
103
+ root_logger.addHandler(file_handler)
104
+
105
+ def log_unhandled_exception(self, exc_type, exc_value, exc_traceback):
106
+ if issubclass(exc_type, KeyboardInterrupt):
107
+ logger.info('Program interrupted by user (KeyboardInterrupt). Exiting...')
108
+ sys.__excepthook__(exc_type, exc_value, exc_traceback)
109
+ else:
110
+ logger.exception(
111
+ 'Unhandled exception occurred:',
112
+ exc_info=(exc_type, exc_value, exc_traceback),
113
+ )
114
+
115
+
116
+ class NoopLoggingManager(LoggingManager):
117
+ """
118
+ Only for tests: A logging manager that does nothing.
119
+ """
120
+ def __init__(self, *args, **kwargs):
121
+ pass
122
+
123
+ def start_file_logging(self, log_file: Path):
124
+ pass