PyHardLinkBackup 1.2.0__tar.gz → 1.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.github/workflows/tests.yml +2 -1
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PKG-INFO +54 -27
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/__init__.py +1 -1
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/backup.py +52 -44
- pyhardlinkbackup-1.4.0/PyHardLinkBackup/cli_app/phlb.py +87 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/cli_dev/benchmark.py +51 -32
- pyhardlinkbackup-1.4.0/PyHardLinkBackup/logging_setup.py +124 -0
- pyhardlinkbackup-1.4.0/PyHardLinkBackup/rebuild_databases.py +176 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/tests/test_backup.py +83 -29
- pyhardlinkbackup-1.4.0/PyHardLinkBackup/tests/test_rebuild_database.py +224 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/file_hash_database.py +4 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/humanize.py +17 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/rich_utils.py +8 -8
- pyhardlinkbackup-1.4.0/PyHardLinkBackup/utilities/sha256sums.py +61 -0
- pyhardlinkbackup-1.4.0/PyHardLinkBackup/utilities/tee.py +40 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +3 -1
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/tests/test_file_size_database.py +1 -1
- pyhardlinkbackup-1.4.0/PyHardLinkBackup/utilities/tyro_cli_shared_args.py +12 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/README.md +53 -26
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/docs/README.md +8 -0
- pyhardlinkbackup-1.2.0/PyHardLinkBackup/cli_app/phlb.py +0 -50
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.editorconfig +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.gitignore +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.idea/.gitignore +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.pre-commit-config.yaml +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.pre-commit-hooks.yaml +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.run/Template Python tests.run.xml +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.run/Unittests - __all__.run.xml +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.run/cli.py --help.run.xml +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.run/dev-cli update.run.xml +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.run/only DocTests.run.xml +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.run/only DocWrite.run.xml +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/.venv-app/lib/python3.12/site-packages/cli_base/tests/shell_complete_snapshots/.gitignore +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/__main__.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/cli_app/__init__.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/cli_dev/__init__.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/cli_dev/code_style.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/cli_dev/packaging.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/cli_dev/shell_completion.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/cli_dev/testing.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/cli_dev/update_readme_history.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/constants.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/tests/__init__.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/tests/test_doc_write.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/tests/test_doctests.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/tests/test_project_setup.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/tests/test_readme.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/tests/test_readme_history.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/__init__.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/file_size_database.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/filesystem.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/tests/__init__.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/PyHardLinkBackup/utilities/tests/test_filesystem.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/cli.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/dev-cli.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/dist/.gitignore +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/docs/about-docs.md +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/noxfile.py +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/pyproject.toml +0 -0
- {pyhardlinkbackup-1.2.0 → pyhardlinkbackup-1.4.0}/uv.lock +0 -0
|
@@ -13,11 +13,12 @@ on:
|
|
|
13
13
|
|
|
14
14
|
jobs:
|
|
15
15
|
test:
|
|
16
|
-
runs-on: ubuntu-latest
|
|
17
16
|
strategy:
|
|
18
17
|
fail-fast: false
|
|
19
18
|
matrix:
|
|
20
19
|
python-version: ["3.14", "3.13", "3.12"]
|
|
20
|
+
os: [ubuntu-latest, macos-latest] # TODO: windows-latest
|
|
21
|
+
runs-on: ${{ matrix.os }}
|
|
21
22
|
steps:
|
|
22
23
|
- name: Checkout
|
|
23
24
|
run: |
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: PyHardLinkBackup
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.0
|
|
4
4
|
Summary: HardLink/Deduplication Backups with Python
|
|
5
5
|
Project-URL: Documentation, https://github.com/jedie/PyHardLinkBackup
|
|
6
6
|
Project-URL: Source, https://github.com/jedie/PyHardLinkBackup
|
|
@@ -42,43 +42,58 @@ The main command is `phlb backup <source> <destination>`:
|
|
|
42
42
|
|
|
43
43
|
[comment]: <> (✂✂✂ auto generated backup help start ✂✂✂)
|
|
44
44
|
```
|
|
45
|
-
usage: phlb backup [-h]
|
|
45
|
+
usage: phlb backup [-h] [BACKUP OPTIONS]
|
|
46
46
|
|
|
47
47
|
Backup the source directory to the destination directory using hard links for deduplication.
|
|
48
48
|
|
|
49
|
-
╭─ positional arguments
|
|
50
|
-
│ source Source directory to back up. (required)
|
|
51
|
-
│ destination Destination directory for the backup. (required)
|
|
52
|
-
|
|
53
|
-
╭─ options
|
|
54
|
-
│ -h, --help show this help message and exit
|
|
55
|
-
│ --excludes [STR [STR ...]]
|
|
56
|
-
│ List of
|
|
57
|
-
│
|
|
58
|
-
│
|
|
59
|
-
│
|
|
60
|
-
|
|
49
|
+
╭─ positional arguments ──────────────────────────────────────────────────────────────────────────────────────╮
|
|
50
|
+
│ source Source directory to back up. (required) │
|
|
51
|
+
│ destination Destination directory for the backup. (required) │
|
|
52
|
+
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
53
|
+
╭─ options ───────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
54
|
+
│ -h, --help show this help message and exit │
|
|
55
|
+
│ --excludes [STR [STR ...]] │
|
|
56
|
+
│ List of directories to exclude from backup. (default: __pycache__ .cache .temp .tmp .tox .nox) │
|
|
57
|
+
│ --verbosity {debug,info,warning,error} │
|
|
58
|
+
│ Log level for console logging. (default: warning) │
|
|
59
|
+
│ --log-file-level {debug,info,warning,error} │
|
|
60
|
+
│ Log level for the log file (default: info) │
|
|
61
|
+
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
61
62
|
```
|
|
62
63
|
[comment]: <> (✂✂✂ auto generated backup help end ✂✂✂)
|
|
63
64
|
|
|
64
65
|
|
|
65
66
|
|
|
67
|
+
Running a backup looks like:
|
|
68
|
+
|
|
69
|
+

|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
If it's finished it display a summary:
|
|
74
|
+
|
|
75
|
+

|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
|
|
66
79
|
complete help for main CLI app:
|
|
67
80
|
|
|
68
81
|
[comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
|
|
69
82
|
```
|
|
70
|
-
usage: phlb [-h] {backup,version}
|
|
83
|
+
usage: phlb [-h] {backup,rebuild,version}
|
|
71
84
|
|
|
72
85
|
|
|
73
86
|
|
|
74
|
-
╭─ options
|
|
75
|
-
│ -h, --help show this help message and exit
|
|
76
|
-
|
|
77
|
-
╭─ subcommands
|
|
78
|
-
│ (required)
|
|
79
|
-
│ • backup Backup the source directory to the destination directory using hard links for deduplication.
|
|
80
|
-
│ •
|
|
81
|
-
|
|
87
|
+
╭─ options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
88
|
+
│ -h, --help show this help message and exit │
|
|
89
|
+
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
90
|
+
╭─ subcommands ────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
91
|
+
│ (required) │
|
|
92
|
+
│ • backup Backup the source directory to the destination directory using hard links for deduplication. │
|
|
93
|
+
│ • rebuild Rebuild the file hash and size database by scanning all backup files. And also verify SHA256SUMS and/or │
|
|
94
|
+
│ store missing hashes in SHA256SUMS files. │
|
|
95
|
+
│ • version Print version and exit │
|
|
96
|
+
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
82
97
|
```
|
|
83
98
|
[comment]: <> (✂✂✂ auto generated main help end ✂✂✂)
|
|
84
99
|
|
|
@@ -160,7 +175,7 @@ The file contains only the relative path to the first hardlink of this file cont
|
|
|
160
175
|
|
|
161
176
|
[comment]: <> (✂✂✂ auto generated dev help start ✂✂✂)
|
|
162
177
|
```
|
|
163
|
-
usage: ./dev-cli.py [-h] {benchmark-hashes,coverage,install,lint,mypy,nox,pip-audit,publish,shell-completion,test,update,update-readme-history,update-test-snapshot-files,version}
|
|
178
|
+
usage: ./dev-cli.py [-h] {benchmark-hashes,coverage,install,lint,mypy,nox,pip-audit,publish,scan-benchmark,shell-completion,test,update,update-readme-history,update-test-snapshot-files,version}
|
|
164
179
|
|
|
165
180
|
|
|
166
181
|
|
|
@@ -178,6 +193,8 @@ usage: ./dev-cli.py [-h] {benchmark-hashes,coverage,install,lint,mypy,nox,pip-au
|
|
|
178
193
|
│ • nox Run nox │
|
|
179
194
|
│ • pip-audit Run pip-audit check against current requirements files │
|
|
180
195
|
│ • publish Build and upload this project to PyPi │
|
|
196
|
+
│ • scan-benchmark │
|
|
197
|
+
│ Benchmark our filesystem scan routine. │
|
|
181
198
|
│ • shell-completion │
|
|
182
199
|
│ Setup shell completion for this CLI (Currently only for bash shell) │
|
|
183
200
|
│ • test Run unittests │
|
|
@@ -214,6 +231,16 @@ Overview of main changes:
|
|
|
214
231
|
|
|
215
232
|
[comment]: <> (✂✂✂ auto generated history start ✂✂✂)
|
|
216
233
|
|
|
234
|
+
* [v1.4.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.3.0...v1.4.0)
|
|
235
|
+
* 2026-01-16 - Create log file in backup and a summary.txt
|
|
236
|
+
* 2026-01-16 - Run CI tests on macos, too.
|
|
237
|
+
* 2026-01-16 - add dev cli command "scan-benchmark"
|
|
238
|
+
* [v1.3.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.2.0...v1.3.0)
|
|
239
|
+
* 2026-01-15 - Verify SHA256SUMS files in "rebuild" command, too.
|
|
240
|
+
* 2026-01-15 - Code cleanup: use more generic names for and in BackupProgress
|
|
241
|
+
* 2026-01-15 - Add tests for rebuild
|
|
242
|
+
* 2026-01-15 - Add command to "rebuld" the size and hash filesystem database
|
|
243
|
+
* 2026-01-15 - Add screenshots in the README
|
|
217
244
|
* [v1.2.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.1.0...v1.2.0)
|
|
218
245
|
* 2026-01-15 - Add error handling: Log exception but continue with the backup
|
|
219
246
|
* 2026-01-15 - Check permission and hadlink support on destination path
|
|
@@ -223,6 +250,9 @@ Overview of main changes:
|
|
|
223
250
|
* [v1.1.0](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.1...v1.1.0)
|
|
224
251
|
* 2026-01-14 - Change backup timestamp directory to old schema: '%Y-%m-%d-%H%M%S'
|
|
225
252
|
* 2026-01-14 - Add "Overview of main changes" to README
|
|
253
|
+
|
|
254
|
+
<details><summary>Expand older history entries ...</summary>
|
|
255
|
+
|
|
226
256
|
* [v1.0.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.0...v1.0.1)
|
|
227
257
|
* 2026-01-13 - Store SHA256SUMS files in backup directories
|
|
228
258
|
* [v1.0.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.13.0...v1.0.0)
|
|
@@ -235,9 +265,6 @@ Overview of main changes:
|
|
|
235
265
|
* 2026-01-13 - Add DocWrite, handle broken symlinks, keep file meta, handle missing hardlink sources
|
|
236
266
|
* 2026-01-12 - First working iteration with rich progess bar
|
|
237
267
|
* 2026-01-08 - Rewrite everything
|
|
238
|
-
|
|
239
|
-
<details><summary>Expand older history entries ...</summary>
|
|
240
|
-
|
|
241
268
|
* [v0.13.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.3...v0.13.0)
|
|
242
269
|
* 2020-03-18 - release v0.13.0
|
|
243
270
|
* 2020-03-17 - deactivate pypy tests in travis, because of SQLite errors, like:
|
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
import dataclasses
|
|
2
|
+
import datetime
|
|
2
3
|
import logging
|
|
3
4
|
import os
|
|
4
5
|
import shutil
|
|
5
6
|
import sys
|
|
6
7
|
import time
|
|
7
|
-
from datetime import datetime
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
|
|
10
10
|
from rich import print # noqa
|
|
11
11
|
|
|
12
12
|
from PyHardLinkBackup.constants import CHUNK_SIZE
|
|
13
|
+
from PyHardLinkBackup.logging_setup import LoggingManager
|
|
13
14
|
from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
|
|
14
15
|
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
15
16
|
from PyHardLinkBackup.utilities.filesystem import (
|
|
@@ -20,8 +21,10 @@ from PyHardLinkBackup.utilities.filesystem import (
|
|
|
20
21
|
read_and_hash_file,
|
|
21
22
|
supports_hardlinks,
|
|
22
23
|
)
|
|
23
|
-
from PyHardLinkBackup.utilities.humanize import human_filesize
|
|
24
|
-
from PyHardLinkBackup.utilities.rich_utils import
|
|
24
|
+
from PyHardLinkBackup.utilities.humanize import PrintTimingContextManager, human_filesize
|
|
25
|
+
from PyHardLinkBackup.utilities.rich_utils import DisplayFileTreeProgress
|
|
26
|
+
from PyHardLinkBackup.utilities.sha256sums import store_hash
|
|
27
|
+
from PyHardLinkBackup.utilities.tee import TeeStdoutContext
|
|
25
28
|
|
|
26
29
|
|
|
27
30
|
logger = logging.getLogger(__name__)
|
|
@@ -30,6 +33,7 @@ logger = logging.getLogger(__name__)
|
|
|
30
33
|
@dataclasses.dataclass
|
|
31
34
|
class BackupResult:
|
|
32
35
|
backup_dir: Path
|
|
36
|
+
log_file: Path
|
|
33
37
|
#
|
|
34
38
|
backup_count: int = 0
|
|
35
39
|
backup_size: int = 0
|
|
@@ -151,23 +155,13 @@ def backup_one_file(
|
|
|
151
155
|
store_hash(dst_path, file_hash)
|
|
152
156
|
|
|
153
157
|
|
|
154
|
-
def
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
cd .../your/backup/foobar/20240101_120000/
|
|
162
|
-
sha256sum -c SHA256SUMS
|
|
163
|
-
```
|
|
164
|
-
"""
|
|
165
|
-
hash_file_path = file_path.parent / 'SHA256SUMS'
|
|
166
|
-
with hash_file_path.open('a') as f:
|
|
167
|
-
f.write(f'{file_hash} {file_path.name}\n')
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> BackupResult:
|
|
158
|
+
def backup_tree(
|
|
159
|
+
*,
|
|
160
|
+
src_root: Path,
|
|
161
|
+
backup_root: Path,
|
|
162
|
+
excludes: tuple[str, ...],
|
|
163
|
+
log_manager: LoggingManager,
|
|
164
|
+
) -> BackupResult:
|
|
171
165
|
src_root = src_root.resolve()
|
|
172
166
|
if not src_root.is_dir():
|
|
173
167
|
print('Error: Source directory does not exist!')
|
|
@@ -191,23 +185,31 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
|
|
|
191
185
|
sys.exit(1)
|
|
192
186
|
|
|
193
187
|
# Step 1: Scan source directory:
|
|
194
|
-
|
|
188
|
+
excludes: set = set(excludes)
|
|
189
|
+
with PrintTimingContextManager('Filesystem scan completed in'):
|
|
190
|
+
src_file_count, src_total_size = humanized_fs_scan(src_root, excludes=excludes)
|
|
195
191
|
|
|
196
192
|
phlb_conf_dir = backup_root / '.phlb'
|
|
197
193
|
phlb_conf_dir.mkdir(parents=False, exist_ok=True)
|
|
198
194
|
|
|
199
|
-
|
|
200
|
-
|
|
195
|
+
timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S')
|
|
196
|
+
backup_main_dir = backup_root / src_root.name
|
|
197
|
+
backup_dir = backup_main_dir / timestamp
|
|
201
198
|
backup_dir.mkdir(parents=True, exist_ok=False)
|
|
202
199
|
|
|
200
|
+
log_file = backup_main_dir / f'{timestamp}-backup.log'
|
|
201
|
+
log_manager.start_file_logging(log_file)
|
|
202
|
+
|
|
203
|
+
logger.info('Backup %s to %s', src_root, backup_dir)
|
|
204
|
+
|
|
203
205
|
print(f'\nBackup to {backup_dir}...\n')
|
|
204
206
|
|
|
205
|
-
with
|
|
207
|
+
with DisplayFileTreeProgress(src_file_count, src_total_size) as progress:
|
|
206
208
|
# "Databases" for deduplication
|
|
207
209
|
size_db = FileSizeDatabase(phlb_conf_dir)
|
|
208
210
|
hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
|
|
209
211
|
|
|
210
|
-
backup_result = BackupResult(backup_dir=backup_dir)
|
|
212
|
+
backup_result = BackupResult(backup_dir=backup_dir, log_file=log_file)
|
|
211
213
|
|
|
212
214
|
next_update = 0
|
|
213
215
|
for entry in iter_scandir_files(src_root, excludes=excludes):
|
|
@@ -226,27 +228,33 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
|
|
|
226
228
|
else:
|
|
227
229
|
now = time.monotonic()
|
|
228
230
|
if now >= next_update:
|
|
229
|
-
progress.update(
|
|
231
|
+
progress.update(
|
|
232
|
+
completed_file_count=backup_result.backup_count, completed_size=backup_result.backup_size
|
|
233
|
+
)
|
|
230
234
|
next_update = now + 0.5
|
|
231
235
|
|
|
232
236
|
# Finalize progress indicator values:
|
|
233
|
-
progress.update(
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
f' *
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
f'
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
237
|
+
progress.update(completed_file_count=backup_result.backup_count, completed_size=backup_result.backup_size)
|
|
238
|
+
|
|
239
|
+
summary_file = backup_main_dir / f'{timestamp}-summary.txt'
|
|
240
|
+
with TeeStdoutContext(summary_file):
|
|
241
|
+
print(f'\nBackup complete: {backup_dir} (total size {human_filesize(backup_result.backup_size)})\n')
|
|
242
|
+
print(f' Total files processed: {backup_result.backup_count}')
|
|
243
|
+
print(f' * Symlinked files: {backup_result.symlink_files}')
|
|
244
|
+
print(
|
|
245
|
+
f' * Hardlinked files: {backup_result.hardlinked_files}'
|
|
246
|
+
f' (saved {human_filesize(backup_result.hardlinked_size)})'
|
|
247
|
+
)
|
|
248
|
+
print(f' * Copied files: {backup_result.copied_files} (total {human_filesize(backup_result.copied_size)})')
|
|
249
|
+
print(
|
|
250
|
+
f' of which small (<{size_db.MIN_SIZE} Bytes)'
|
|
251
|
+
f' files: {backup_result.copied_small_files}'
|
|
252
|
+
f' (total {human_filesize(backup_result.copied_small_size)})'
|
|
253
|
+
)
|
|
254
|
+
if backup_result.error_count > 0:
|
|
255
|
+
print(f' Errors during backup: {backup_result.error_count} (see log for details)')
|
|
256
|
+
print()
|
|
257
|
+
|
|
258
|
+
logger.info('Backup completed. Summary created: %s', summary_file)
|
|
251
259
|
|
|
252
260
|
return backup_result
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Annotated
|
|
4
|
+
|
|
5
|
+
import tyro
|
|
6
|
+
from rich import print # noqa
|
|
7
|
+
|
|
8
|
+
from PyHardLinkBackup import rebuild_databases
|
|
9
|
+
from PyHardLinkBackup.backup import backup_tree
|
|
10
|
+
from PyHardLinkBackup.cli_app import app
|
|
11
|
+
from PyHardLinkBackup.logging_setup import (
|
|
12
|
+
DEFAULT_CONSOLE_LOG_LEVEL,
|
|
13
|
+
DEFAULT_LOG_FILE_LEVEL,
|
|
14
|
+
LoggingManager,
|
|
15
|
+
TyroConsoleLogLevelArgType,
|
|
16
|
+
TyroLogFileLevelArgType,
|
|
17
|
+
)
|
|
18
|
+
from PyHardLinkBackup.utilities.tyro_cli_shared_args import (
|
|
19
|
+
DEFAULT_EXCLUDE_DIRECTORIES,
|
|
20
|
+
TyroExcludeDirectoriesArgType,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@app.command
|
|
28
|
+
def backup(
|
|
29
|
+
src: Annotated[
|
|
30
|
+
Path,
|
|
31
|
+
tyro.conf.arg(
|
|
32
|
+
metavar='source',
|
|
33
|
+
help='Source directory to back up.',
|
|
34
|
+
),
|
|
35
|
+
],
|
|
36
|
+
dst: Annotated[
|
|
37
|
+
Path,
|
|
38
|
+
tyro.conf.arg(
|
|
39
|
+
metavar='destination',
|
|
40
|
+
help='Destination directory for the backup.',
|
|
41
|
+
),
|
|
42
|
+
],
|
|
43
|
+
/,
|
|
44
|
+
excludes: TyroExcludeDirectoriesArgType = DEFAULT_EXCLUDE_DIRECTORIES,
|
|
45
|
+
verbosity: TyroConsoleLogLevelArgType = DEFAULT_CONSOLE_LOG_LEVEL,
|
|
46
|
+
log_file_level: TyroLogFileLevelArgType = DEFAULT_LOG_FILE_LEVEL,
|
|
47
|
+
) -> None:
|
|
48
|
+
"""
|
|
49
|
+
Backup the source directory to the destination directory using hard links for deduplication.
|
|
50
|
+
"""
|
|
51
|
+
log_manager = LoggingManager(
|
|
52
|
+
console_level=verbosity,
|
|
53
|
+
file_level=log_file_level,
|
|
54
|
+
)
|
|
55
|
+
backup_tree(
|
|
56
|
+
src_root=src,
|
|
57
|
+
backup_root=dst,
|
|
58
|
+
excludes=excludes,
|
|
59
|
+
log_manager=log_manager,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@app.command
|
|
64
|
+
def rebuild(
|
|
65
|
+
backup_root: Annotated[
|
|
66
|
+
Path,
|
|
67
|
+
tyro.conf.arg(
|
|
68
|
+
metavar='backup-directory',
|
|
69
|
+
help='Root directory of the the backups.',
|
|
70
|
+
),
|
|
71
|
+
],
|
|
72
|
+
/,
|
|
73
|
+
verbosity: TyroConsoleLogLevelArgType = DEFAULT_CONSOLE_LOG_LEVEL,
|
|
74
|
+
log_file_level: TyroLogFileLevelArgType = DEFAULT_LOG_FILE_LEVEL,
|
|
75
|
+
) -> None:
|
|
76
|
+
"""
|
|
77
|
+
Rebuild the file hash and size database by scanning all backup files. And also verify SHA256SUMS
|
|
78
|
+
and/or store missing hashes in SHA256SUMS files.
|
|
79
|
+
"""
|
|
80
|
+
log_manager = LoggingManager(
|
|
81
|
+
console_level=verbosity,
|
|
82
|
+
file_level=log_file_level,
|
|
83
|
+
)
|
|
84
|
+
rebuild_databases.rebuild(
|
|
85
|
+
backup_root=backup_root,
|
|
86
|
+
log_manager=log_manager,
|
|
87
|
+
)
|
|
@@ -10,16 +10,34 @@ from cli_base.tyro_commands import TyroVerbosityArgType
|
|
|
10
10
|
from rich import print # noqa
|
|
11
11
|
|
|
12
12
|
from PyHardLinkBackup.cli_dev import app
|
|
13
|
-
from PyHardLinkBackup.utilities.filesystem import iter_scandir_files
|
|
13
|
+
from PyHardLinkBackup.utilities.filesystem import humanized_fs_scan, iter_scandir_files
|
|
14
|
+
from PyHardLinkBackup.utilities.humanize import PrintTimingContextManager
|
|
15
|
+
from PyHardLinkBackup.utilities.tyro_cli_shared_args import DEFAULT_EXCLUDE_DIRECTORIES, TyroExcludeDirectoriesArgType
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
logger = logging.getLogger(__name__)
|
|
17
19
|
|
|
18
20
|
|
|
21
|
+
@app.command
|
|
22
|
+
def scan_benchmark(
|
|
23
|
+
base_path: Path,
|
|
24
|
+
/,
|
|
25
|
+
excludes: TyroExcludeDirectoriesArgType = DEFAULT_EXCLUDE_DIRECTORIES,
|
|
26
|
+
verbosity: TyroVerbosityArgType = 1,
|
|
27
|
+
) -> None:
|
|
28
|
+
"""
|
|
29
|
+
Benchmark our filesystem scan routine.
|
|
30
|
+
"""
|
|
31
|
+
setup_logging(verbosity=verbosity)
|
|
32
|
+
with PrintTimingContextManager('Filesystem scan completed in'):
|
|
33
|
+
humanized_fs_scan(path=base_path, excludes=set(excludes))
|
|
34
|
+
|
|
35
|
+
|
|
19
36
|
@app.command
|
|
20
37
|
def benchmark_hashes(
|
|
21
38
|
base_path: Path,
|
|
22
39
|
/,
|
|
40
|
+
excludes: TyroExcludeDirectoriesArgType = DEFAULT_EXCLUDE_DIRECTORIES,
|
|
23
41
|
max_duration: int = 30, # in seconds
|
|
24
42
|
min_file_size: int = 15 * 1024, # 15 KiB
|
|
25
43
|
max_file_size: int = 100 * 1024 * 1024, # 100 MiB
|
|
@@ -70,40 +88,41 @@ def benchmark_hashes(
|
|
|
70
88
|
stop_time = start_time + max_duration
|
|
71
89
|
next_update = start_time + 2
|
|
72
90
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
91
|
+
with PrintTimingContextManager('Filesystem scan completed in'):
|
|
92
|
+
for dir_entry in iter_scandir_files(path=base_path, excludes=set(excludes)):
|
|
93
|
+
entry_stat = dir_entry.stat()
|
|
94
|
+
file_size = entry_stat.st_size
|
|
95
|
+
if not (min_file_size <= file_size <= max_file_size):
|
|
96
|
+
continue
|
|
78
97
|
|
|
79
|
-
start_time = time.perf_counter()
|
|
80
|
-
file_content = Path(dir_entry.path).read_bytes()
|
|
81
|
-
duration = time.perf_counter() - start_time
|
|
82
|
-
total_read_time += duration
|
|
83
|
-
|
|
84
|
-
for algo in algorithms:
|
|
85
|
-
# Actual measurement:
|
|
86
98
|
start_time = time.perf_counter()
|
|
87
|
-
|
|
99
|
+
file_content = Path(dir_entry.path).read_bytes()
|
|
88
100
|
duration = time.perf_counter() - start_time
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
101
|
+
total_read_time += duration
|
|
102
|
+
|
|
103
|
+
for algo in algorithms:
|
|
104
|
+
# Actual measurement:
|
|
105
|
+
start_time = time.perf_counter()
|
|
106
|
+
hashlib.new(algo, file_content)
|
|
107
|
+
duration = time.perf_counter() - start_time
|
|
108
|
+
|
|
109
|
+
results[algo].add(duration)
|
|
110
|
+
|
|
111
|
+
file_count += 1
|
|
112
|
+
total_size += entry_stat.st_size
|
|
113
|
+
|
|
114
|
+
now = time.time()
|
|
115
|
+
if now >= stop_time:
|
|
116
|
+
print('Reached max duration limit, stopping benchmark...')
|
|
117
|
+
break
|
|
118
|
+
|
|
119
|
+
if now >= next_update:
|
|
120
|
+
percent = (now - (stop_time - max_duration)) / max_duration * 100
|
|
121
|
+
print(
|
|
122
|
+
f'{int(percent)}% Processed {file_count} files so far,'
|
|
123
|
+
f' total size: {total_size / 1024 / 1024:.1f} MiB...'
|
|
124
|
+
)
|
|
125
|
+
next_update = now + 2
|
|
107
126
|
|
|
108
127
|
print(f'\nTotal files hashed: {file_count}, total size: {total_size / 1024 / 1024:.1f} MiB')
|
|
109
128
|
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Annotated, Literal
|
|
5
|
+
|
|
6
|
+
import tyro
|
|
7
|
+
from bx_py_utils.path import assert_is_dir
|
|
8
|
+
from rich import (
|
|
9
|
+
get_console,
|
|
10
|
+
print, # noqa
|
|
11
|
+
)
|
|
12
|
+
from rich.logging import RichHandler
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
LogLevelLiteral = Literal['debug', 'info', 'warning', 'error']
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
TyroConsoleLogLevelArgType = Annotated[
|
|
21
|
+
LogLevelLiteral,
|
|
22
|
+
tyro.conf.arg(
|
|
23
|
+
help='Log level for console logging.',
|
|
24
|
+
),
|
|
25
|
+
]
|
|
26
|
+
DEFAULT_CONSOLE_LOG_LEVEL: TyroConsoleLogLevelArgType = 'warning'
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
TyroLogFileLevelArgType = Annotated[
|
|
30
|
+
LogLevelLiteral,
|
|
31
|
+
tyro.conf.arg(
|
|
32
|
+
help='Log level for the log file',
|
|
33
|
+
),
|
|
34
|
+
]
|
|
35
|
+
DEFAULT_LOG_FILE_LEVEL: TyroLogFileLevelArgType = 'info'
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def log_level_name2int(level_name: str) -> int:
|
|
39
|
+
level_name = level_name.upper()
|
|
40
|
+
level_mapping = logging.getLevelNamesMapping()
|
|
41
|
+
try:
|
|
42
|
+
return level_mapping[level_name]
|
|
43
|
+
except KeyError as err:
|
|
44
|
+
raise ValueError(f'Invalid log level name: {level_name}') from err
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
console = get_console()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class LoggingManager:
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
*,
|
|
54
|
+
console_level: TyroConsoleLogLevelArgType,
|
|
55
|
+
file_level: TyroLogFileLevelArgType,
|
|
56
|
+
):
|
|
57
|
+
self.console_level_name = console_level
|
|
58
|
+
self.console_level: int = log_level_name2int(console_level)
|
|
59
|
+
self.file_level_name = file_level
|
|
60
|
+
self.file_level: int = log_level_name2int(file_level)
|
|
61
|
+
|
|
62
|
+
self.lowest_level = min(self.console_level, self.file_level)
|
|
63
|
+
|
|
64
|
+
if console_level == logging.DEBUG:
|
|
65
|
+
log_format = '(%(name)s) %(message)s'
|
|
66
|
+
else:
|
|
67
|
+
log_format = '%(message)s'
|
|
68
|
+
|
|
69
|
+
console.print(
|
|
70
|
+
f'(Set [bold]console[bold] log level: [cyan]{self.console_level_name}[/cyan])',
|
|
71
|
+
justify='right',
|
|
72
|
+
)
|
|
73
|
+
handler = RichHandler(console=console, omit_repeated_times=False)
|
|
74
|
+
handler.setLevel(self.console_level)
|
|
75
|
+
logging.basicConfig(
|
|
76
|
+
level=self.lowest_level,
|
|
77
|
+
format=log_format,
|
|
78
|
+
datefmt='[%x %X.%f]',
|
|
79
|
+
handlers=[handler],
|
|
80
|
+
force=True,
|
|
81
|
+
)
|
|
82
|
+
sys.excepthook = self.log_unhandled_exception
|
|
83
|
+
|
|
84
|
+
def start_file_logging(self, log_file: Path):
|
|
85
|
+
console.print(
|
|
86
|
+
f'(initialize log file [bold]{log_file}[/bold] with level: [cyan]{self.file_level_name}[/cyan])',
|
|
87
|
+
justify='right',
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
assert_is_dir(log_file.parent)
|
|
91
|
+
|
|
92
|
+
root_logger = logging.getLogger()
|
|
93
|
+
|
|
94
|
+
file_handler = logging.FileHandler(log_file, mode='a', encoding='utf-8')
|
|
95
|
+
file_handler.setLevel(self.file_level)
|
|
96
|
+
|
|
97
|
+
formatter = logging.Formatter(
|
|
98
|
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
99
|
+
datefmt='%Y-%m-%d %H:%M:%S',
|
|
100
|
+
)
|
|
101
|
+
file_handler.setFormatter(formatter)
|
|
102
|
+
|
|
103
|
+
root_logger.addHandler(file_handler)
|
|
104
|
+
|
|
105
|
+
def log_unhandled_exception(self, exc_type, exc_value, exc_traceback):
|
|
106
|
+
if issubclass(exc_type, KeyboardInterrupt):
|
|
107
|
+
logger.info('Program interrupted by user (KeyboardInterrupt). Exiting...')
|
|
108
|
+
sys.__excepthook__(exc_type, exc_value, exc_traceback)
|
|
109
|
+
else:
|
|
110
|
+
logger.exception(
|
|
111
|
+
'Unhandled exception occurred:',
|
|
112
|
+
exc_info=(exc_type, exc_value, exc_traceback),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class NoopLoggingManager(LoggingManager):
|
|
117
|
+
"""
|
|
118
|
+
Only for tests: A logging manager that does nothing.
|
|
119
|
+
"""
|
|
120
|
+
def __init__(self, *args, **kwargs):
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
def start_file_logging(self, log_file: Path):
|
|
124
|
+
pass
|