PyHardLinkBackup 1.0.0__tar.gz → 1.0.0rc0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PKG-INFO +62 -83
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/__init__.py +3 -2
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/backup.py +8 -20
- {pyhardlinkbackup-1.0.0/PyHardLinkBackup/cli_dev → pyhardlinkbackup-1.0.0rc0/PyHardLinkBackup/cli_app}/benchmark.py +24 -23
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/cli_app/phlb.py +1 -1
- pyhardlinkbackup-1.0.0rc0/PyHardLinkBackup/tests/test_backup.py +188 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/tests/test_readme.py +0 -9
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/file_hash_database.py +2 -7
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/file_size_database.py +10 -16
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/filesystem.py +9 -20
- pyhardlinkbackup-1.0.0rc0/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +68 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/tests/test_file_size_database.py +0 -12
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/tests/test_filesystem.py +2 -6
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/README.md +61 -82
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/cli.py +1 -1
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/dev-cli.py +1 -1
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/pyproject.toml +2 -11
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/uv.lock +6 -17
- pyhardlinkbackup-1.0.0/.run/only DocWrite.run.xml +0 -24
- pyhardlinkbackup-1.0.0/PyHardLinkBackup/tests/test_backup.py +0 -399
- pyhardlinkbackup-1.0.0/PyHardLinkBackup/tests/test_doc_write.py +0 -25
- pyhardlinkbackup-1.0.0/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +0 -134
- pyhardlinkbackup-1.0.0/docs/README.md +0 -57
- pyhardlinkbackup-1.0.0/docs/about-docs.md +0 -8
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.editorconfig +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.github/workflows/tests.yml +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.gitignore +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.idea/.gitignore +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.pre-commit-config.yaml +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.pre-commit-hooks.yaml +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.run/Template Python tests.run.xml +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.run/Unittests - __all__.run.xml +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.run/cli.py --help.run.xml +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.run/dev-cli update.run.xml +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.run/only DocTests.run.xml +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.venv-app/lib/python3.12/site-packages/cli_base/tests/shell_complete_snapshots/.gitignore +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/__main__.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/cli_app/__init__.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/cli_dev/__init__.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/cli_dev/code_style.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/cli_dev/packaging.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/cli_dev/shell_completion.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/cli_dev/testing.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/cli_dev/update_readme_history.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/constants.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/tests/__init__.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/tests/test_doctests.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/tests/test_project_setup.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/tests/test_readme_history.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/__init__.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/humanize.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/rich_utils.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/tests/__init__.py +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/dist/.gitignore +0 -0
- {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/noxfile.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: PyHardLinkBackup
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.0rc0
|
|
4
4
|
Summary: HardLink/Deduplication Backups with Python
|
|
5
5
|
Project-URL: Documentation, https://github.com/jedie/PyHardLinkBackup
|
|
6
6
|
Project-URL: Source, https://github.com/jedie/PyHardLinkBackup
|
|
@@ -23,71 +23,7 @@ Description-Content-Type: text/markdown
|
|
|
23
23
|
|
|
24
24
|
HardLink/Deduplication Backups with Python
|
|
25
25
|
|
|
26
|
-
**WIP:** v1.0.0 is a complete rewrite of PyHardLinkBackup.
|
|
27
|
-
|
|
28
|
-
## installation
|
|
29
|
-
|
|
30
|
-
You can use [pipx](https://pipx.pypa.io/stable/installation/) to install and use PyHardLinkBackup, e.g.:
|
|
31
|
-
|
|
32
|
-
```bash
|
|
33
|
-
sudo apt install pipx
|
|
34
|
-
|
|
35
|
-
pipx install PyHardLinkBackup
|
|
36
|
-
```
|
|
37
|
-
|
|
38
|
-
After this you can call the CLI via `phlb` command.
|
|
39
|
-
The main command is `phlb backup <source> <destination>`:
|
|
40
|
-
|
|
41
|
-
[comment]: <> (✂✂✂ auto generated backup help start ✂✂✂)
|
|
42
|
-
```
|
|
43
|
-
usage: phlb backup [-h] source destination [--excludes [STR [STR ...]]] [-v]
|
|
44
|
-
|
|
45
|
-
Backup the source directory to the destination directory using hard links for deduplication.
|
|
46
|
-
|
|
47
|
-
╭─ positional arguments ───────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
48
|
-
│ source Source directory to back up. (required) │
|
|
49
|
-
│ destination Destination directory for the backup. (required) │
|
|
50
|
-
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
51
|
-
╭─ options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
52
|
-
│ -h, --help show this help message and exit │
|
|
53
|
-
│ --excludes [STR [STR ...]] │
|
|
54
|
-
│ List of directory or file names to exclude from backup. (default: __pycache__ .cache .temp .tmp .tox │
|
|
55
|
-
│ .nox) │
|
|
56
|
-
│ -v, --verbosity │
|
|
57
|
-
│ Verbosity level; e.g.: -v, -vv, -vvv, etc. (repeatable) │
|
|
58
|
-
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
59
|
-
```
|
|
60
|
-
[comment]: <> (✂✂✂ auto generated backup help end ✂✂✂)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
complete help for main CLI app:
|
|
65
|
-
|
|
66
|
-
[comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
|
|
67
|
-
```
|
|
68
|
-
usage: phlb [-h] {backup,version}
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
╭─ options ─────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
73
|
-
│ -h, --help show this help message and exit │
|
|
74
|
-
╰───────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
75
|
-
╭─ subcommands ─────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
76
|
-
│ (required) │
|
|
77
|
-
│ • backup Backup the source directory to the destination directory using hard links for deduplication. │
|
|
78
|
-
│ • version Print version and exit │
|
|
79
|
-
╰───────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
80
|
-
```
|
|
81
|
-
[comment]: <> (✂✂✂ auto generated main help end ✂✂✂)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
### update
|
|
85
|
-
|
|
86
|
-
If you use pipx, just call:
|
|
87
|
-
```bash
|
|
88
|
-
pipx upgrade PyHardLinkBackup
|
|
89
|
-
```
|
|
90
|
-
see: https://pipx.pypa.io/stable/docs/#pipx-upgrade
|
|
26
|
+
**WIP:** v1.0.0 is a complete rewrite of PyHardLinkBackup. The new version is not usable, yet!
|
|
91
27
|
|
|
92
28
|
|
|
93
29
|
## concept
|
|
@@ -147,18 +83,70 @@ e.g.: hash like `abcdef123...` stored in: `{destination}/.phlb/hash-lookup/ab/cd
|
|
|
147
83
|
The file contains only the relative path to the first hardlink of this file content.
|
|
148
84
|
|
|
149
85
|
|
|
150
|
-
##
|
|
86
|
+
## CLI - backup command
|
|
87
|
+
|
|
88
|
+
The main command is `backup`:
|
|
89
|
+
|
|
90
|
+
[comment]: <> (✂✂✂ auto generated backup help start ✂✂✂)
|
|
91
|
+
```
|
|
92
|
+
usage: ./cli.py backup [-h] source destination [--excludes STR|{[STR [STR ...]]}] [-v]
|
|
93
|
+
|
|
94
|
+
Backup the source directory to the destination directory using hard links for deduplication.
|
|
151
95
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
96
|
+
╭─ positional arguments ───────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
97
|
+
│ source Source directory to back up. (required) │
|
|
98
|
+
│ destination Destination directory for the backup. (required) │
|
|
99
|
+
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
100
|
+
╭─ options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
101
|
+
│ -h, --help show this help message and exit │
|
|
102
|
+
│ --excludes STR|{[STR [STR ...]]} │
|
|
103
|
+
│ List of directory or file names to exclude from backup. (default: __pycache__ .cache .temp .tmp .tox │
|
|
104
|
+
│ .nox) │
|
|
105
|
+
│ -v, --verbosity │
|
|
106
|
+
│ Verbosity level; e.g.: -v, -vv, -vvv, etc. (repeatable) │
|
|
107
|
+
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
157
108
|
```
|
|
109
|
+
[comment]: <> (✂✂✂ auto generated backup help end ✂✂✂)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
## CLI - main app help
|
|
113
|
+
|
|
114
|
+
[comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
|
|
115
|
+
```
|
|
116
|
+
usage: ./cli.py [-h] {backup,benchmark-hashes,version}
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
╭─ options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
121
|
+
│ -h, --help show this help message and exit │
|
|
122
|
+
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
123
|
+
╭─ subcommands ────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
124
|
+
│ (required) │
|
|
125
|
+
│ • backup Backup the source directory to the destination directory using hard links for deduplication. │
|
|
126
|
+
│ • benchmark-hashes Benchmark different file hashing algorithms on the given path Example output: │
|
|
127
|
+
│ │
|
|
128
|
+
│ Total files hashed: 220, total size: 1187.7 MiB │
|
|
129
|
+
│ │
|
|
130
|
+
│ Results: Total file content read time: 1.7817s │
|
|
131
|
+
│ │
|
|
132
|
+
│ sha1 | Total: 0.6827s | 0.4x hash/read sha256 | Total: 0.7189s | 0.4x hash/read │
|
|
133
|
+
│ sha224 | Total: 0.7375s | 0.4x hash/read sha384 | Total: 1.6552s | 0.9x hash/read │
|
|
134
|
+
│ blake2b | Total: 1.6708s | 0.9x hash/read md5 | Total: 1.6870s | 0.9x hash/read │
|
|
135
|
+
│ sha512 | Total: 1.7269s | 1.0x hash/read shake_128 | Total: 1.9834s | 1.1x hash/read │
|
|
136
|
+
│ sha3_224 | Total: 2.3006s | 1.3x hash/read sha3_256 | Total: 2.3856s | 1.3x hash/read │
|
|
137
|
+
│ shake_256 | Total: 2.4375s | 1.4x hash/read blake2s | Total: 2.5219s | 1.4x hash/read │
|
|
138
|
+
│ sha3_384 | Total: 3.2596s | 1.8x hash/read sha3_512 | Total: 4.5328s | 2.5x hash/read │
|
|
139
|
+
│ • version Print version and exit │
|
|
140
|
+
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
141
|
+
```
|
|
142
|
+
[comment]: <> (✂✂✂ auto generated main help end ✂✂✂)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
## dev CLI
|
|
158
146
|
|
|
159
147
|
[comment]: <> (✂✂✂ auto generated dev help start ✂✂✂)
|
|
160
148
|
```
|
|
161
|
-
usage: ./dev-cli.py [-h] {
|
|
149
|
+
usage: ./dev-cli.py [-h] {coverage,install,lint,mypy,nox,pip-audit,publish,shell-completion,test,update,update-readme-history,update-test-snapshot-files,version}
|
|
162
150
|
|
|
163
151
|
|
|
164
152
|
|
|
@@ -167,8 +155,6 @@ usage: ./dev-cli.py [-h] {benchmark-hashes,coverage,install,lint,mypy,nox,pip-au
|
|
|
167
155
|
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
168
156
|
╭─ subcommands ────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
169
157
|
│ (required) │
|
|
170
|
-
│ • benchmark-hashes │
|
|
171
|
-
│ Benchmark different file hashing algorithms on the given path. │
|
|
172
158
|
│ • coverage Run tests and show coverage report. │
|
|
173
159
|
│ • install Install requirements and 'PyHardLinkBackup' via pip as editable. │
|
|
174
160
|
│ • lint Check/fix code style by run: "ruff check --fix" │
|
|
@@ -204,14 +190,7 @@ v1 is a complete rewrite of PyHardLinkBackup.
|
|
|
204
190
|
|
|
205
191
|
[comment]: <> (✂✂✂ auto generated history start ✂✂✂)
|
|
206
192
|
|
|
207
|
-
* [v1.0.
|
|
208
|
-
* 2026-01-13 - Change "./cli.py" to "phlb" (because it's the name installed via pipx)
|
|
209
|
-
* 2026-01-13 - Update README
|
|
210
|
-
* 2026-01-13 - Fix benchmark moved to dev CLI ;)
|
|
211
|
-
* 2026-01-13 - Remove tyro warning
|
|
212
|
-
* 2026-01-13 - Move "benchmark_hashes" from app to dev cli (It's more for testing)
|
|
213
|
-
* 2026-01-13 - Rename [project.scripts] hooks
|
|
214
|
-
* 2026-01-13 - Add DocWrite, handle broken symlinks, keep file meta, handle missing hardlink sources
|
|
193
|
+
* [v1.0.0rc0](https://github.com/jedie/PyHardLinkBackup/compare/v0.13.0...v1.0.0rc0)
|
|
215
194
|
* 2026-01-12 - First working iteration with rich progess bar
|
|
216
195
|
* 2026-01-08 - Rewrite everything
|
|
217
196
|
* [v0.13.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.3...v0.13.0)
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""
|
|
2
|
+
PyHardLinkBackup
|
|
2
3
|
HardLink/Deduplication Backups with Python
|
|
3
4
|
"""
|
|
4
5
|
|
|
5
6
|
# See https://packaging.python.org/en/latest/specifications/version-specifiers/
|
|
6
|
-
__version__ = '1.0.
|
|
7
|
+
__version__ = '1.0.0rc0'
|
|
7
8
|
__author__ = 'Jens Diemer <PyHardLinkBackup@jensdiemer.de>'
|
|
@@ -84,22 +84,7 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
|
|
|
84
84
|
next_update = 0
|
|
85
85
|
for entry in iter_scandir_files(src_root, excludes=excludes):
|
|
86
86
|
backup_count += 1
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
dst_path = backup_dir / src_path.relative_to(src_root)
|
|
90
|
-
dst_dir_path = dst_path.parent
|
|
91
|
-
if not dst_dir_path.exists():
|
|
92
|
-
dst_dir_path.mkdir(parents=True, exist_ok=False)
|
|
93
|
-
|
|
94
|
-
try:
|
|
95
|
-
size = entry.stat().st_size
|
|
96
|
-
except FileNotFoundError:
|
|
97
|
-
# e.g.: Handle broken symlink
|
|
98
|
-
target = os.readlink(src_path)
|
|
99
|
-
dst_path.symlink_to(target)
|
|
100
|
-
symlink_files += 1
|
|
101
|
-
continue
|
|
102
|
-
|
|
87
|
+
size = entry.stat().st_size
|
|
103
88
|
backup_size += size
|
|
104
89
|
|
|
105
90
|
now = time.monotonic()
|
|
@@ -107,8 +92,14 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
|
|
|
107
92
|
progress.update(backup_count=backup_count, backup_size=backup_size)
|
|
108
93
|
next_update = now + 0.5
|
|
109
94
|
|
|
95
|
+
src_path = Path(entry.path)
|
|
96
|
+
dst_path = backup_dir / src_path.relative_to(src_root)
|
|
97
|
+
|
|
98
|
+
dst_path.parent.mkdir(parents=True, exist_ok=True)
|
|
99
|
+
|
|
110
100
|
if entry.is_symlink():
|
|
111
101
|
logger.debug('Copy symlink: %s to %s', src_path, dst_path)
|
|
102
|
+
# Copy symlinks as-is
|
|
112
103
|
target = os.readlink(src_path)
|
|
113
104
|
dst_path.symlink_to(target)
|
|
114
105
|
symlink_files += 1
|
|
@@ -161,12 +152,10 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
|
|
|
161
152
|
hardlinked_size += size
|
|
162
153
|
else:
|
|
163
154
|
logger.info('Copy unique file: %s to %s', src_path, dst_path)
|
|
155
|
+
shutil.copy2(src_path, dst_path)
|
|
164
156
|
hash_db[file_hash] = dst_path
|
|
165
157
|
copied_files += 1
|
|
166
158
|
copied_size += size
|
|
167
|
-
|
|
168
|
-
# Keep original file metadata (permission bits, time stamps, and flags)
|
|
169
|
-
shutil.copy2(src_path, dst_path)
|
|
170
159
|
else:
|
|
171
160
|
# A file with this size not backuped before -> Can't be duplicate -> copy and hash
|
|
172
161
|
file_hash = copy_and_hash(src_path, dst_path)
|
|
@@ -175,7 +164,6 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
|
|
|
175
164
|
copied_files += 1
|
|
176
165
|
copied_size += size
|
|
177
166
|
|
|
178
|
-
# Finalize progress indicator values:
|
|
179
167
|
progress.update(backup_count=backup_count, backup_size=backup_size)
|
|
180
168
|
|
|
181
169
|
print(f'\nBackup complete: {backup_dir} (total size {human_filesize(backup_size)})\n')
|
|
@@ -9,7 +9,7 @@ from cli_base.cli_tools.verbosity import setup_logging
|
|
|
9
9
|
from cli_base.tyro_commands import TyroVerbosityArgType
|
|
10
10
|
from rich import print # noqa
|
|
11
11
|
|
|
12
|
-
from PyHardLinkBackup.
|
|
12
|
+
from PyHardLinkBackup.cli_app import app
|
|
13
13
|
from PyHardLinkBackup.utilities.filesystem import iter_scandir_files
|
|
14
14
|
|
|
15
15
|
|
|
@@ -26,29 +26,30 @@ def benchmark_hashes(
|
|
|
26
26
|
verbosity: TyroVerbosityArgType = 1,
|
|
27
27
|
) -> None:
|
|
28
28
|
"""
|
|
29
|
-
Benchmark different file hashing algorithms on the given path
|
|
29
|
+
Benchmark different file hashing algorithms on the given path
|
|
30
|
+
|
|
31
|
+
Example output:
|
|
32
|
+
|
|
33
|
+
Total files hashed: 220, total size: 1187.7 MiB
|
|
34
|
+
|
|
35
|
+
Results:
|
|
36
|
+
Total file content read time: 1.7817s
|
|
37
|
+
|
|
38
|
+
sha1 | Total: 0.6827s | 0.4x hash/read
|
|
39
|
+
sha256 | Total: 0.7189s | 0.4x hash/read
|
|
40
|
+
sha224 | Total: 0.7375s | 0.4x hash/read
|
|
41
|
+
sha384 | Total: 1.6552s | 0.9x hash/read
|
|
42
|
+
blake2b | Total: 1.6708s | 0.9x hash/read
|
|
43
|
+
md5 | Total: 1.6870s | 0.9x hash/read
|
|
44
|
+
sha512 | Total: 1.7269s | 1.0x hash/read
|
|
45
|
+
shake_128 | Total: 1.9834s | 1.1x hash/read
|
|
46
|
+
sha3_224 | Total: 2.3006s | 1.3x hash/read
|
|
47
|
+
sha3_256 | Total: 2.3856s | 1.3x hash/read
|
|
48
|
+
shake_256 | Total: 2.4375s | 1.4x hash/read
|
|
49
|
+
blake2s | Total: 2.5219s | 1.4x hash/read
|
|
50
|
+
sha3_384 | Total: 3.2596s | 1.8x hash/read
|
|
51
|
+
sha3_512 | Total: 4.5328s | 2.5x hash/read
|
|
30
52
|
"""
|
|
31
|
-
# Example output:
|
|
32
|
-
#
|
|
33
|
-
# Total files hashed: 220, total size: 1187.7 MiB
|
|
34
|
-
#
|
|
35
|
-
# Results:
|
|
36
|
-
# Total file content read time: 1.7817s
|
|
37
|
-
#
|
|
38
|
-
# sha1 | Total: 0.6827s | 0.4x hash/read
|
|
39
|
-
# sha256 | Total: 0.7189s | 0.4x hash/read
|
|
40
|
-
# sha224 | Total: 0.7375s | 0.4x hash/read
|
|
41
|
-
# sha384 | Total: 1.6552s | 0.9x hash/read
|
|
42
|
-
# blake2b | Total: 1.6708s | 0.9x hash/read
|
|
43
|
-
# md5 | Total: 1.6870s | 0.9x hash/read
|
|
44
|
-
# sha512 | Total: 1.7269s | 1.0x hash/read
|
|
45
|
-
# shake_128 | Total: 1.9834s | 1.1x hash/read
|
|
46
|
-
# sha3_224 | Total: 2.3006s | 1.3x hash/read
|
|
47
|
-
# sha3_256 | Total: 2.3856s | 1.3x hash/read
|
|
48
|
-
# shake_256 | Total: 2.4375s | 1.4x hash/read
|
|
49
|
-
# blake2s | Total: 2.5219s | 1.4x hash/read
|
|
50
|
-
# sha3_384 | Total: 3.2596s | 1.8x hash/read
|
|
51
|
-
# sha3_512 | Total: 4.5328s | 2.5x hash/read
|
|
52
53
|
setup_logging(verbosity=verbosity)
|
|
53
54
|
assert_is_dir(base_path)
|
|
54
55
|
print(f'Benchmarking file hashes under: {base_path}')
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import tempfile
|
|
3
|
+
import textwrap
|
|
4
|
+
import zlib
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from unittest import TestCase
|
|
7
|
+
|
|
8
|
+
from bx_py_utils.test_utils.assertion import assert_text_equal
|
|
9
|
+
from freezegun import freeze_time
|
|
10
|
+
|
|
11
|
+
from PyHardLinkBackup.backup import BackupResult, backup_tree
|
|
12
|
+
from PyHardLinkBackup.constants import CHUNK_SIZE
|
|
13
|
+
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
14
|
+
from PyHardLinkBackup.utilities.filesystem import iter_scandir_files
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def fs_tree_overview(root: Path) -> str:
|
|
18
|
+
lines = []
|
|
19
|
+
for entry in iter_scandir_files(root, excludes=set()):
|
|
20
|
+
file_path = Path(entry.path)
|
|
21
|
+
crc32 = zlib.crc32(file_path.read_bytes())
|
|
22
|
+
rel_path = file_path.relative_to(root)
|
|
23
|
+
|
|
24
|
+
nlink = entry.stat().st_nlink
|
|
25
|
+
if entry.is_symlink():
|
|
26
|
+
file_type = 'symlink'
|
|
27
|
+
elif nlink > 1:
|
|
28
|
+
file_type = 'hardlink'
|
|
29
|
+
else:
|
|
30
|
+
file_type = 'file'
|
|
31
|
+
|
|
32
|
+
lines.append(
|
|
33
|
+
f'{str(rel_path):<20} | {file_type:<8} | {nlink=} | {entry.stat().st_size:>8} Bytes | crc32: {crc32:08x}'
|
|
34
|
+
)
|
|
35
|
+
return '\n'.join(sorted(lines))
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def assert_fs_tree_overview(root: Path, expected_overview: str):
|
|
39
|
+
expected_overview = textwrap.dedent(expected_overview).strip()
|
|
40
|
+
actual_overview = fs_tree_overview(root)
|
|
41
|
+
assert_text_equal(
|
|
42
|
+
actual_overview,
|
|
43
|
+
expected_overview,
|
|
44
|
+
msg=f'Filesystem tree overview does not match expected overview.\n\n{actual_overview}\n\n',
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class BackupTreeTestCase(TestCase):
|
|
49
|
+
def test_happy_path(self):
|
|
50
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
51
|
+
temp_path = Path(temp_dir)
|
|
52
|
+
|
|
53
|
+
src_root = temp_path / 'source'
|
|
54
|
+
backup_root = temp_path / 'backup'
|
|
55
|
+
|
|
56
|
+
src_root.mkdir()
|
|
57
|
+
backup_root.mkdir()
|
|
58
|
+
|
|
59
|
+
file1_path = src_root / 'file2.txt'
|
|
60
|
+
file1_path.write_text('This is file 1')
|
|
61
|
+
|
|
62
|
+
(src_root / 'symlink2file1').symlink_to(file1_path)
|
|
63
|
+
os.link(file1_path, src_root / 'hardlink2file1')
|
|
64
|
+
|
|
65
|
+
sub_dir = src_root / 'subdir'
|
|
66
|
+
sub_dir.mkdir()
|
|
67
|
+
(sub_dir / 'file.txt').write_text('This is file in subdir')
|
|
68
|
+
|
|
69
|
+
# Only files bigger than MIN_SIZE will be considered for hardlinking:
|
|
70
|
+
size_db_min_file = src_root / 'min_sized_file1.bin'
|
|
71
|
+
size_db_min_file.write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
|
|
72
|
+
|
|
73
|
+
# Same content and big enough to be considered for hardlinking:
|
|
74
|
+
size_db_min_file = src_root / 'min_sized_file2.bin'
|
|
75
|
+
size_db_min_file.write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
|
|
76
|
+
|
|
77
|
+
# Larger then CHUNK_SIZE file will be handled differently:
|
|
78
|
+
large_file = src_root / 'large_file.bin'
|
|
79
|
+
large_file.write_bytes(b'Y' * (CHUNK_SIZE + 1))
|
|
80
|
+
|
|
81
|
+
excluded_dir = src_root / '.cache'
|
|
82
|
+
excluded_dir.mkdir()
|
|
83
|
+
(excluded_dir / 'tempfile.tmp').write_text('Temporary file that should be excluded')
|
|
84
|
+
|
|
85
|
+
#######################################################################################
|
|
86
|
+
# Create first backup:
|
|
87
|
+
|
|
88
|
+
with freeze_time('2026-01-01T12:34:56Z', auto_tick_seconds=0):
|
|
89
|
+
result = backup_tree(
|
|
90
|
+
src_root=src_root,
|
|
91
|
+
backup_root=backup_root,
|
|
92
|
+
excludes={'.cache'},
|
|
93
|
+
)
|
|
94
|
+
backup_dir = result.backup_dir
|
|
95
|
+
self.assertEqual(
|
|
96
|
+
str(Path(backup_dir).relative_to(temp_path)),
|
|
97
|
+
'backup/source/20260101_123456',
|
|
98
|
+
)
|
|
99
|
+
self.assertEqual(
|
|
100
|
+
result,
|
|
101
|
+
BackupResult(
|
|
102
|
+
backup_dir=backup_dir,
|
|
103
|
+
backup_count=7,
|
|
104
|
+
backup_size=67110929,
|
|
105
|
+
symlink_files=1,
|
|
106
|
+
hardlinked_files=1,
|
|
107
|
+
hardlinked_size=1000,
|
|
108
|
+
copied_files=5,
|
|
109
|
+
copied_size=67109915,
|
|
110
|
+
copied_small_files=3,
|
|
111
|
+
copied_small_size=50,
|
|
112
|
+
),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# The sources:
|
|
116
|
+
assert_fs_tree_overview(
|
|
117
|
+
root=src_root,
|
|
118
|
+
expected_overview="""
|
|
119
|
+
.cache/tempfile.tmp | file | nlink=1 | 38 Bytes | crc32: 41d7a2c9
|
|
120
|
+
file2.txt | hardlink | nlink=2 | 14 Bytes | crc32: 8a11514a
|
|
121
|
+
hardlink2file1 | hardlink | nlink=2 | 14 Bytes | crc32: 8a11514a
|
|
122
|
+
large_file.bin | file | nlink=1 | 67108865 Bytes | crc32: 9671eaac
|
|
123
|
+
min_sized_file1.bin | file | nlink=1 | 1000 Bytes | crc32: f0d93de4
|
|
124
|
+
min_sized_file2.bin | file | nlink=1 | 1000 Bytes | crc32: f0d93de4
|
|
125
|
+
subdir/file.txt | file | nlink=1 | 22 Bytes | crc32: c0167e63
|
|
126
|
+
symlink2file1 | symlink | nlink=2 | 14 Bytes | crc32: 8a11514a
|
|
127
|
+
""",
|
|
128
|
+
)
|
|
129
|
+
# The backup:
|
|
130
|
+
# * /.cache/ -> excluded
|
|
131
|
+
# * min_sized_file1.bin and min_sized_file2.bin -> hardlinked
|
|
132
|
+
assert_fs_tree_overview(
|
|
133
|
+
root=backup_dir,
|
|
134
|
+
expected_overview="""
|
|
135
|
+
file2.txt | file | nlink=1 | 14 Bytes | crc32: 8a11514a
|
|
136
|
+
hardlink2file1 | file | nlink=1 | 14 Bytes | crc32: 8a11514a
|
|
137
|
+
large_file.bin | file | nlink=1 | 67108865 Bytes | crc32: 9671eaac
|
|
138
|
+
min_sized_file1.bin | hardlink | nlink=2 | 1000 Bytes | crc32: f0d93de4
|
|
139
|
+
min_sized_file2.bin | hardlink | nlink=2 | 1000 Bytes | crc32: f0d93de4
|
|
140
|
+
subdir/file.txt | file | nlink=1 | 22 Bytes | crc32: c0167e63
|
|
141
|
+
symlink2file1 | symlink | nlink=2 | 14 Bytes | crc32: 8a11514a
|
|
142
|
+
""",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
#######################################################################################
|
|
146
|
+
# Just backup again:
|
|
147
|
+
|
|
148
|
+
with freeze_time('2026-01-02T12:34:56Z', auto_tick_seconds=0):
|
|
149
|
+
result = backup_tree(
|
|
150
|
+
src_root=src_root,
|
|
151
|
+
backup_root=backup_root,
|
|
152
|
+
excludes={'.cache'},
|
|
153
|
+
)
|
|
154
|
+
backup_dir = result.backup_dir
|
|
155
|
+
self.assertEqual(
|
|
156
|
+
str(Path(backup_dir).relative_to(temp_path)),
|
|
157
|
+
'backup/source/20260102_123456',
|
|
158
|
+
)
|
|
159
|
+
self.assertEqual(
|
|
160
|
+
result,
|
|
161
|
+
BackupResult(
|
|
162
|
+
backup_dir=backup_dir,
|
|
163
|
+
backup_count=7,
|
|
164
|
+
backup_size=67110929,
|
|
165
|
+
symlink_files=1,
|
|
166
|
+
hardlinked_files=3, # <<< More hardlinks this time!
|
|
167
|
+
hardlinked_size=67110865,
|
|
168
|
+
copied_files=3,
|
|
169
|
+
copied_size=50,
|
|
170
|
+
copied_small_files=3,
|
|
171
|
+
copied_small_size=50,
|
|
172
|
+
),
|
|
173
|
+
)
|
|
174
|
+
# The second backup:
|
|
175
|
+
# * /.cache/ -> excluded
|
|
176
|
+
# * min_sized_file1.bin and min_sized_file2.bin -> hardlinked
|
|
177
|
+
assert_fs_tree_overview(
|
|
178
|
+
root=backup_dir,
|
|
179
|
+
expected_overview="""
|
|
180
|
+
file2.txt | file | nlink=1 | 14 Bytes | crc32: 8a11514a
|
|
181
|
+
hardlink2file1 | file | nlink=1 | 14 Bytes | crc32: 8a11514a
|
|
182
|
+
large_file.bin | hardlink | nlink=2 | 67108865 Bytes | crc32: 9671eaac
|
|
183
|
+
min_sized_file1.bin | hardlink | nlink=4 | 1000 Bytes | crc32: f0d93de4
|
|
184
|
+
min_sized_file2.bin | hardlink | nlink=4 | 1000 Bytes | crc32: f0d93de4
|
|
185
|
+
subdir/file.txt | file | nlink=1 | 22 Bytes | crc32: c0167e63
|
|
186
|
+
symlink2file1 | symlink | nlink=2 | 14 Bytes | crc32: 8a11514a
|
|
187
|
+
""",
|
|
188
|
+
)
|
|
@@ -26,7 +26,6 @@ class ReadmeTestCase(BaseTestCase):
|
|
|
26
26
|
def test_main_help(self):
|
|
27
27
|
with NoColorEnvRich():
|
|
28
28
|
stdout = invoke(cli_bin=PACKAGE_ROOT / 'cli.py', args=['--help'], strip_line_prefix='usage: ')
|
|
29
|
-
|
|
30
29
|
self.assert_in_content(
|
|
31
30
|
got=stdout,
|
|
32
31
|
parts=(
|
|
@@ -36,10 +35,6 @@ class ReadmeTestCase(BaseTestCase):
|
|
|
36
35
|
constants.CLI_EPILOG,
|
|
37
36
|
),
|
|
38
37
|
)
|
|
39
|
-
|
|
40
|
-
# Installed via pipx is called 'phlb', not 'cli.py':
|
|
41
|
-
stdout = stdout.replace('./cli.py', 'phlb')
|
|
42
|
-
|
|
43
38
|
assert_cli_help_in_readme(text_block=stdout, marker='main help')
|
|
44
39
|
|
|
45
40
|
def test_backup_help(self):
|
|
@@ -52,10 +47,6 @@ class ReadmeTestCase(BaseTestCase):
|
|
|
52
47
|
'Backup the source directory to the destination',
|
|
53
48
|
),
|
|
54
49
|
)
|
|
55
|
-
|
|
56
|
-
# Installed via pipx is called 'phlb', not 'cli.py':
|
|
57
|
-
stdout = stdout.replace('./cli.py', 'phlb')
|
|
58
|
-
|
|
59
50
|
assert_cli_help_in_readme(text_block=stdout, marker='backup help')
|
|
60
51
|
|
|
61
52
|
def test_dev_help(self):
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import logging
|
|
2
1
|
from pathlib import Path
|
|
3
2
|
|
|
4
3
|
|
|
@@ -7,8 +6,8 @@ class HashAlreadyExistsError(ValueError):
|
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
class FileHashDatabase:
|
|
10
|
-
"""
|
|
11
|
-
A simple
|
|
9
|
+
"""
|
|
10
|
+
A simple database to store file content hash <-> relative path mappings.
|
|
12
11
|
Uses a directory structure to avoid too many files in a single directory.
|
|
13
12
|
Path structure:
|
|
14
13
|
{base_dst}/.phlb/hash-lookup/{XX}/{YY}/{hash}
|
|
@@ -40,10 +39,6 @@ class FileHashDatabase:
|
|
|
40
39
|
return None
|
|
41
40
|
else:
|
|
42
41
|
abs_file_path = self.backup_root / rel_file_path
|
|
43
|
-
if not abs_file_path.is_file():
|
|
44
|
-
logging.warning('Hash database entry found, but file does not exist: %s', abs_file_path)
|
|
45
|
-
hash_path.unlink()
|
|
46
|
-
return None
|
|
47
42
|
return abs_file_path
|
|
48
43
|
|
|
49
44
|
def __setitem__(self, hash: str, abs_file_path: Path):
|
|
@@ -2,11 +2,17 @@ from pathlib import Path
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class FileSizeDatabase:
|
|
5
|
-
"""
|
|
6
|
-
A simple
|
|
7
|
-
|
|
5
|
+
"""
|
|
6
|
+
A simple database to track which file sizes have been seen.
|
|
8
7
|
Uses a directory structure to avoid too many files in a single directory.
|
|
9
|
-
|
|
8
|
+
|
|
9
|
+
Path structure:
|
|
10
|
+
{base_dst}/.phlb/size-lookup/{XX}/{YY}/{size}
|
|
11
|
+
e.g.:
|
|
12
|
+
1234567890 results in: {base_dst}/.phlb/size-lookup/12/34/1234567890
|
|
13
|
+
|
|
14
|
+
Notes:
|
|
15
|
+
* We don't "cache" anything in Memory, to avoid high memory consumption for large datasets.
|
|
10
16
|
"""
|
|
11
17
|
|
|
12
18
|
MIN_SIZE = 1000 # no padding is made, so the min size is 1000 bytes!
|
|
@@ -18,15 +24,6 @@ class FileSizeDatabase:
|
|
|
18
24
|
def _get_size_path(self, size: int) -> Path:
|
|
19
25
|
assert size >= self.MIN_SIZE, f'Size must be at least {self.MIN_SIZE} bytes'
|
|
20
26
|
size_str = str(size)
|
|
21
|
-
|
|
22
|
-
"""DocWrite: README.md ## FileSizeDatabase
|
|
23
|
-
Path structure:
|
|
24
|
-
* `{base_dst}/.phlb/size-lookup/{XX}/{YY}/{size}`
|
|
25
|
-
|
|
26
|
-
e.g.:
|
|
27
|
-
|
|
28
|
-
* `1234567890` results in: `{base_dst}/.phlb/size-lookup/12/34/1234567890`
|
|
29
|
-
"""
|
|
30
27
|
first_dir_name = size_str[:2]
|
|
31
28
|
second_dir_name = size_str[2:4]
|
|
32
29
|
size_path = self.base_path / first_dir_name / second_dir_name / size_str
|
|
@@ -40,7 +37,4 @@ class FileSizeDatabase:
|
|
|
40
37
|
size_path = self._get_size_path(size)
|
|
41
38
|
if not size_path.exists():
|
|
42
39
|
size_path.parent.mkdir(parents=True, exist_ok=True)
|
|
43
|
-
|
|
44
|
-
"""DocWrite: README.md ## FileSizeDatabase
|
|
45
|
-
All files are created empty, as we only care about their existence."""
|
|
46
40
|
size_path.touch(exist_ok=False)
|