PyHardLinkBackup 1.0.0rc0__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyhardlinkbackup-1.0.1/.run/only DocWrite.run.xml +24 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PKG-INFO +88 -65
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/__init__.py +2 -3
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/backup.py +46 -8
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/cli_app/phlb.py +1 -1
- {pyhardlinkbackup-1.0.0rc0/PyHardLinkBackup/cli_app → pyhardlinkbackup-1.0.1/PyHardLinkBackup/cli_dev}/benchmark.py +23 -24
- pyhardlinkbackup-1.0.1/PyHardLinkBackup/tests/test_backup.py +451 -0
- pyhardlinkbackup-1.0.1/PyHardLinkBackup/tests/test_doc_write.py +25 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/tests/test_readme.py +9 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/utilities/file_hash_database.py +7 -2
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/utilities/file_size_database.py +16 -10
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/utilities/filesystem.py +20 -9
- pyhardlinkbackup-1.0.1/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +136 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/utilities/tests/test_file_size_database.py +12 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/utilities/tests/test_filesystem.py +6 -2
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/README.md +87 -64
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/cli.py +1 -1
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/dev-cli.py +1 -1
- pyhardlinkbackup-1.0.1/docs/README.md +68 -0
- pyhardlinkbackup-1.0.1/docs/about-docs.md +8 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/pyproject.toml +11 -2
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/uv.lock +17 -6
- pyhardlinkbackup-1.0.0rc0/PyHardLinkBackup/tests/test_backup.py +0 -188
- pyhardlinkbackup-1.0.0rc0/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +0 -68
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/.editorconfig +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/.github/workflows/tests.yml +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/.gitignore +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/.idea/.gitignore +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/.pre-commit-config.yaml +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/.pre-commit-hooks.yaml +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/.run/Template Python tests.run.xml +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/.run/Unittests - __all__.run.xml +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/.run/cli.py --help.run.xml +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/.run/dev-cli update.run.xml +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/.run/only DocTests.run.xml +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/.venv-app/lib/python3.12/site-packages/cli_base/tests/shell_complete_snapshots/.gitignore +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/__main__.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/cli_app/__init__.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/cli_dev/__init__.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/cli_dev/code_style.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/cli_dev/packaging.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/cli_dev/shell_completion.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/cli_dev/testing.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/cli_dev/update_readme_history.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/constants.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/tests/__init__.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/tests/test_doctests.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/tests/test_project_setup.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/tests/test_readme_history.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/utilities/__init__.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/utilities/humanize.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/utilities/rich_utils.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/PyHardLinkBackup/utilities/tests/__init__.py +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/dist/.gitignore +0 -0
- {pyhardlinkbackup-1.0.0rc0 → pyhardlinkbackup-1.0.1}/noxfile.py +0 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
<component name="ProjectRunConfigurationManager">
|
|
2
|
+
<configuration default="false" name="only DocWrite" type="tests" factoryName="Unittests">
|
|
3
|
+
<module name="PyHardLinkBackup" />
|
|
4
|
+
<option name="ENV_FILES" value="" />
|
|
5
|
+
<option name="INTERPRETER_OPTIONS" value="" />
|
|
6
|
+
<option name="PARENT_ENVS" value="true" />
|
|
7
|
+
<envs>
|
|
8
|
+
<env name="PYTHONUNBUFFERED" value="1" />
|
|
9
|
+
<env name="PYTHONWARNINGS" value="always" />
|
|
10
|
+
</envs>
|
|
11
|
+
<option name="SDK_HOME" value="" />
|
|
12
|
+
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
|
13
|
+
<option name="IS_MODULE_SDK" value="true" />
|
|
14
|
+
<option name="ADD_CONTENT_ROOTS" value="false" />
|
|
15
|
+
<option name="ADD_SOURCE_ROOTS" value="false" />
|
|
16
|
+
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
|
17
|
+
<option name="RUN_TOOL" value="" />
|
|
18
|
+
<option name="_new_pattern" value="""" />
|
|
19
|
+
<option name="_new_additionalArguments" value="""" />
|
|
20
|
+
<option name="_new_target" value=""PyHardLinkBackup.tests.test_doc_write"" />
|
|
21
|
+
<option name="_new_targetType" value=""PYTHON"" />
|
|
22
|
+
<method v="2" />
|
|
23
|
+
</configuration>
|
|
24
|
+
</component>
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: PyHardLinkBackup
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: HardLink/Deduplication Backups with Python
|
|
5
5
|
Project-URL: Documentation, https://github.com/jedie/PyHardLinkBackup
|
|
6
6
|
Project-URL: Source, https://github.com/jedie/PyHardLinkBackup
|
|
@@ -23,7 +23,71 @@ Description-Content-Type: text/markdown
|
|
|
23
23
|
|
|
24
24
|
HardLink/Deduplication Backups with Python
|
|
25
25
|
|
|
26
|
-
**WIP:** v1.0.0 is a complete rewrite of PyHardLinkBackup.
|
|
26
|
+
**WIP:** v1.0.0 is a complete rewrite of PyHardLinkBackup.
|
|
27
|
+
|
|
28
|
+
## installation
|
|
29
|
+
|
|
30
|
+
You can use [pipx](https://pipx.pypa.io/stable/installation/) to install and use PyHardLinkBackup, e.g.:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
sudo apt install pipx
|
|
34
|
+
|
|
35
|
+
pipx install PyHardLinkBackup
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
After this you can call the CLI via `phlb` command.
|
|
39
|
+
The main command is `phlb backup <source> <destination>`:
|
|
40
|
+
|
|
41
|
+
[comment]: <> (✂✂✂ auto generated backup help start ✂✂✂)
|
|
42
|
+
```
|
|
43
|
+
usage: phlb backup [-h] source destination [--excludes [STR [STR ...]]] [-v]
|
|
44
|
+
|
|
45
|
+
Backup the source directory to the destination directory using hard links for deduplication.
|
|
46
|
+
|
|
47
|
+
╭─ positional arguments ───────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
48
|
+
│ source Source directory to back up. (required) │
|
|
49
|
+
│ destination Destination directory for the backup. (required) │
|
|
50
|
+
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
51
|
+
╭─ options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
52
|
+
│ -h, --help show this help message and exit │
|
|
53
|
+
│ --excludes [STR [STR ...]] │
|
|
54
|
+
│ List of directory or file names to exclude from backup. (default: __pycache__ .cache .temp .tmp .tox │
|
|
55
|
+
│ .nox) │
|
|
56
|
+
│ -v, --verbosity │
|
|
57
|
+
│ Verbosity level; e.g.: -v, -vv, -vvv, etc. (repeatable) │
|
|
58
|
+
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
59
|
+
```
|
|
60
|
+
[comment]: <> (✂✂✂ auto generated backup help end ✂✂✂)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
complete help for main CLI app:
|
|
65
|
+
|
|
66
|
+
[comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
|
|
67
|
+
```
|
|
68
|
+
usage: phlb [-h] {backup,version}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
╭─ options ─────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
73
|
+
│ -h, --help show this help message and exit │
|
|
74
|
+
╰───────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
75
|
+
╭─ subcommands ─────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
76
|
+
│ (required) │
|
|
77
|
+
│ • backup Backup the source directory to the destination directory using hard links for deduplication. │
|
|
78
|
+
│ • version Print version and exit │
|
|
79
|
+
╰───────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
80
|
+
```
|
|
81
|
+
[comment]: <> (✂✂✂ auto generated main help end ✂✂✂)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
### update
|
|
85
|
+
|
|
86
|
+
If you use pipx, just call:
|
|
87
|
+
```bash
|
|
88
|
+
pipx upgrade PyHardLinkBackup
|
|
89
|
+
```
|
|
90
|
+
see: https://pipx.pypa.io/stable/docs/#pipx-upgrade
|
|
27
91
|
|
|
28
92
|
|
|
29
93
|
## concept
|
|
@@ -83,70 +147,18 @@ e.g.: hash like `abcdef123...` stored in: `{destination}/.phlb/hash-lookup/ab/cd
|
|
|
83
147
|
The file contains only the relative path to the first hardlink of this file content.
|
|
84
148
|
|
|
85
149
|
|
|
86
|
-
##
|
|
87
|
-
|
|
88
|
-
The main command is `backup`:
|
|
89
|
-
|
|
90
|
-
[comment]: <> (✂✂✂ auto generated backup help start ✂✂✂)
|
|
91
|
-
```
|
|
92
|
-
usage: ./cli.py backup [-h] source destination [--excludes STR|{[STR [STR ...]]}] [-v]
|
|
93
|
-
|
|
94
|
-
Backup the source directory to the destination directory using hard links for deduplication.
|
|
95
|
-
|
|
96
|
-
╭─ positional arguments ───────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
97
|
-
│ source Source directory to back up. (required) │
|
|
98
|
-
│ destination Destination directory for the backup. (required) │
|
|
99
|
-
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
100
|
-
╭─ options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
101
|
-
│ -h, --help show this help message and exit │
|
|
102
|
-
│ --excludes STR|{[STR [STR ...]]} │
|
|
103
|
-
│ List of directory or file names to exclude from backup. (default: __pycache__ .cache .temp .tmp .tox │
|
|
104
|
-
│ .nox) │
|
|
105
|
-
│ -v, --verbosity │
|
|
106
|
-
│ Verbosity level; e.g.: -v, -vv, -vvv, etc. (repeatable) │
|
|
107
|
-
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
108
|
-
```
|
|
109
|
-
[comment]: <> (✂✂✂ auto generated backup help end ✂✂✂)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
## CLI - main app help
|
|
113
|
-
|
|
114
|
-
[comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
|
|
115
|
-
```
|
|
116
|
-
usage: ./cli.py [-h] {backup,benchmark-hashes,version}
|
|
117
|
-
|
|
150
|
+
## start development
|
|
118
151
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
│ (required) │
|
|
125
|
-
│ • backup Backup the source directory to the destination directory using hard links for deduplication. │
|
|
126
|
-
│ • benchmark-hashes Benchmark different file hashing algorithms on the given path Example output: │
|
|
127
|
-
│ │
|
|
128
|
-
│ Total files hashed: 220, total size: 1187.7 MiB │
|
|
129
|
-
│ │
|
|
130
|
-
│ Results: Total file content read time: 1.7817s │
|
|
131
|
-
│ │
|
|
132
|
-
│ sha1 | Total: 0.6827s | 0.4x hash/read sha256 | Total: 0.7189s | 0.4x hash/read │
|
|
133
|
-
│ sha224 | Total: 0.7375s | 0.4x hash/read sha384 | Total: 1.6552s | 0.9x hash/read │
|
|
134
|
-
│ blake2b | Total: 1.6708s | 0.9x hash/read md5 | Total: 1.6870s | 0.9x hash/read │
|
|
135
|
-
│ sha512 | Total: 1.7269s | 1.0x hash/read shake_128 | Total: 1.9834s | 1.1x hash/read │
|
|
136
|
-
│ sha3_224 | Total: 2.3006s | 1.3x hash/read sha3_256 | Total: 2.3856s | 1.3x hash/read │
|
|
137
|
-
│ shake_256 | Total: 2.4375s | 1.4x hash/read blake2s | Total: 2.5219s | 1.4x hash/read │
|
|
138
|
-
│ sha3_384 | Total: 3.2596s | 1.8x hash/read sha3_512 | Total: 4.5328s | 2.5x hash/read │
|
|
139
|
-
│ • version Print version and exit │
|
|
140
|
-
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
152
|
+
```bash
|
|
153
|
+
~$ git clone https://github.com/jedie/PyHardLinkBackup.git
|
|
154
|
+
~$ cd PyHardLinkBackup
|
|
155
|
+
~/PyHardLinkBackup$ ./cli.py --help
|
|
156
|
+
~/PyHardLinkBackup$ ./dev-cli.py --help
|
|
141
157
|
```
|
|
142
|
-
[comment]: <> (✂✂✂ auto generated main help end ✂✂✂)
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
## dev CLI
|
|
146
158
|
|
|
147
159
|
[comment]: <> (✂✂✂ auto generated dev help start ✂✂✂)
|
|
148
160
|
```
|
|
149
|
-
usage: ./dev-cli.py [-h] {coverage,install,lint,mypy,nox,pip-audit,publish,shell-completion,test,update,update-readme-history,update-test-snapshot-files,version}
|
|
161
|
+
usage: ./dev-cli.py [-h] {benchmark-hashes,coverage,install,lint,mypy,nox,pip-audit,publish,shell-completion,test,update,update-readme-history,update-test-snapshot-files,version}
|
|
150
162
|
|
|
151
163
|
|
|
152
164
|
|
|
@@ -155,6 +167,8 @@ usage: ./dev-cli.py [-h] {coverage,install,lint,mypy,nox,pip-audit,publish,shell
|
|
|
155
167
|
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
156
168
|
╭─ subcommands ────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
157
169
|
│ (required) │
|
|
170
|
+
│ • benchmark-hashes │
|
|
171
|
+
│ Benchmark different file hashing algorithms on the given path. │
|
|
158
172
|
│ • coverage Run tests and show coverage report. │
|
|
159
173
|
│ • install Install requirements and 'PyHardLinkBackup' via pip as editable. │
|
|
160
174
|
│ • lint Check/fix code style by run: "ruff check --fix" │
|
|
@@ -190,7 +204,16 @@ v1 is a complete rewrite of PyHardLinkBackup.
|
|
|
190
204
|
|
|
191
205
|
[comment]: <> (✂✂✂ auto generated history start ✂✂✂)
|
|
192
206
|
|
|
193
|
-
* [v1.0.
|
|
207
|
+
* [v1.0.1](https://github.com/jedie/PyHardLinkBackup/compare/v1.0.0...v1.0.1)
|
|
208
|
+
* 2026-01-13 - Store SHA256SUMS files in backup directories
|
|
209
|
+
* [v1.0.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.13.0...v1.0.0)
|
|
210
|
+
* 2026-01-13 - Change "./cli.py" to "phlb" (because it's the name installed via pipx)
|
|
211
|
+
* 2026-01-13 - Update README
|
|
212
|
+
* 2026-01-13 - Fix benchmark moved to dev CLI ;)
|
|
213
|
+
* 2026-01-13 - Remove tyro warning
|
|
214
|
+
* 2026-01-13 - Move "benchmark_hashes" from app to dev cli (It's more for testing)
|
|
215
|
+
* 2026-01-13 - Rename [project.scripts] hooks
|
|
216
|
+
* 2026-01-13 - Add DocWrite, handle broken symlinks, keep file meta, handle missing hardlink sources
|
|
194
217
|
* 2026-01-12 - First working iteration with rich progess bar
|
|
195
218
|
* 2026-01-08 - Rewrite everything
|
|
196
219
|
* [v0.13.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.3...v0.13.0)
|
|
@@ -220,6 +243,9 @@ v1 is a complete rewrite of PyHardLinkBackup.
|
|
|
220
243
|
* 2020-03-16 - just warn if used directly (needfull for devlopment to call this directly ;)
|
|
221
244
|
* 2020-03-16 - update requirements
|
|
222
245
|
* 2020-03-16 - +pytest-randomly
|
|
246
|
+
|
|
247
|
+
<details><summary>Expand older history entries ...</summary>
|
|
248
|
+
|
|
223
249
|
* [v0.12.2](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.1...v0.12.2)
|
|
224
250
|
* 2020-03-06 - repare v0.12.2 release
|
|
225
251
|
* 2020-03-06 - enhance log file content
|
|
@@ -228,9 +254,6 @@ v1 is a complete rewrite of PyHardLinkBackup.
|
|
|
228
254
|
* 2020-03-05 - Fix #40 by decrease log level
|
|
229
255
|
* 2020-03-05 - Update boot_pyhardlinkbackup.cmd
|
|
230
256
|
* 2020-03-05 - Update boot_pyhardlinkbackup.sh
|
|
231
|
-
|
|
232
|
-
<details><summary>Expand older history entries ...</summary>
|
|
233
|
-
|
|
234
257
|
* [v0.12.1](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.0...v0.12.1)
|
|
235
258
|
* 2020-03-05 - update tests and set version to 0.12.1
|
|
236
259
|
* 2020-03-05 - less verbose pytest output
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
"""
|
|
2
|
-
PyHardLinkBackup
|
|
1
|
+
"""DocWrite: README.md # PyHardLinkBackup
|
|
3
2
|
HardLink/Deduplication Backups with Python
|
|
4
3
|
"""
|
|
5
4
|
|
|
6
5
|
# See https://packaging.python.org/en/latest/specifications/version-specifiers/
|
|
7
|
-
__version__ = '1.0.
|
|
6
|
+
__version__ = '1.0.1'
|
|
8
7
|
__author__ = 'Jens Diemer <PyHardLinkBackup@jensdiemer.de>'
|
|
@@ -40,6 +40,22 @@ class BackupResult:
|
|
|
40
40
|
copied_small_size: int
|
|
41
41
|
|
|
42
42
|
|
|
43
|
+
def store_hash(file_path: Path, file_hash: str):
|
|
44
|
+
"""DocWrite: README.md ## SHA256SUMS
|
|
45
|
+
A `SHA256SUMS` file is stored in each backup directory containing the SHA256 hashes of all files in that directory.
|
|
46
|
+
It's the same format as e.g.: `sha256sum * > SHA256SUMS` command produces.
|
|
47
|
+
So it's possible to verify the integrity of the backup files later.
|
|
48
|
+
e.g.:
|
|
49
|
+
```bash
|
|
50
|
+
cd .../your/backup/foobar/20240101_120000/
|
|
51
|
+
sha256sum -c SHA256SUMS
|
|
52
|
+
```
|
|
53
|
+
"""
|
|
54
|
+
hash_file_path = file_path.parent / 'SHA256SUMS'
|
|
55
|
+
with hash_file_path.open('a') as f:
|
|
56
|
+
f.write(f'{file_hash} {file_path.name}\n')
|
|
57
|
+
|
|
58
|
+
|
|
43
59
|
def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> BackupResult:
|
|
44
60
|
src_root = src_root.resolve()
|
|
45
61
|
if not src_root.is_dir():
|
|
@@ -84,22 +100,37 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
|
|
|
84
100
|
next_update = 0
|
|
85
101
|
for entry in iter_scandir_files(src_root, excludes=excludes):
|
|
86
102
|
backup_count += 1
|
|
87
|
-
|
|
103
|
+
src_path = Path(entry.path)
|
|
104
|
+
|
|
105
|
+
dst_path = backup_dir / src_path.relative_to(src_root)
|
|
106
|
+
dst_dir_path = dst_path.parent
|
|
107
|
+
if not dst_dir_path.exists():
|
|
108
|
+
dst_dir_path.mkdir(parents=True, exist_ok=False)
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
size = entry.stat().st_size
|
|
112
|
+
except FileNotFoundError:
|
|
113
|
+
# e.g.: Handle broken symlink
|
|
114
|
+
target = os.readlink(src_path)
|
|
115
|
+
dst_path.symlink_to(target)
|
|
116
|
+
symlink_files += 1
|
|
117
|
+
continue
|
|
118
|
+
|
|
88
119
|
backup_size += size
|
|
89
120
|
|
|
121
|
+
if entry.name == 'SHA256SUMS':
|
|
122
|
+
# Skip existing SHA256SUMS files in source tree,
|
|
123
|
+
# because we create our own SHA256SUMS files.
|
|
124
|
+
logger.debug('Skip existing SHA256SUMS file: %s', src_path)
|
|
125
|
+
continue
|
|
126
|
+
|
|
90
127
|
now = time.monotonic()
|
|
91
128
|
if now >= next_update:
|
|
92
129
|
progress.update(backup_count=backup_count, backup_size=backup_size)
|
|
93
130
|
next_update = now + 0.5
|
|
94
131
|
|
|
95
|
-
src_path = Path(entry.path)
|
|
96
|
-
dst_path = backup_dir / src_path.relative_to(src_root)
|
|
97
|
-
|
|
98
|
-
dst_path.parent.mkdir(parents=True, exist_ok=True)
|
|
99
|
-
|
|
100
132
|
if entry.is_symlink():
|
|
101
133
|
logger.debug('Copy symlink: %s to %s', src_path, dst_path)
|
|
102
|
-
# Copy symlinks as-is
|
|
103
134
|
target = os.readlink(src_path)
|
|
104
135
|
dst_path.symlink_to(target)
|
|
105
136
|
symlink_files += 1
|
|
@@ -114,10 +145,12 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
|
|
|
114
145
|
# Small file -> always copy without deduplication
|
|
115
146
|
logger.info('Copy small file: %s to %s', src_path, dst_path)
|
|
116
147
|
shutil.copy2(src_path, dst_path)
|
|
148
|
+
file_hash = copy_and_hash(src_path, dst_path)
|
|
117
149
|
copied_files += 1
|
|
118
150
|
copied_size += size
|
|
119
151
|
copied_small_files += 1
|
|
120
152
|
copied_small_size += size
|
|
153
|
+
store_hash(dst_path, file_hash)
|
|
121
154
|
continue
|
|
122
155
|
|
|
123
156
|
if size in size_db:
|
|
@@ -152,10 +185,12 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
|
|
|
152
185
|
hardlinked_size += size
|
|
153
186
|
else:
|
|
154
187
|
logger.info('Copy unique file: %s to %s', src_path, dst_path)
|
|
155
|
-
shutil.copy2(src_path, dst_path)
|
|
156
188
|
hash_db[file_hash] = dst_path
|
|
157
189
|
copied_files += 1
|
|
158
190
|
copied_size += size
|
|
191
|
+
|
|
192
|
+
# Keep original file metadata (permission bits, time stamps, and flags)
|
|
193
|
+
shutil.copy2(src_path, dst_path)
|
|
159
194
|
else:
|
|
160
195
|
# A file with this size not backuped before -> Can't be duplicate -> copy and hash
|
|
161
196
|
file_hash = copy_and_hash(src_path, dst_path)
|
|
@@ -164,6 +199,9 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
|
|
|
164
199
|
copied_files += 1
|
|
165
200
|
copied_size += size
|
|
166
201
|
|
|
202
|
+
store_hash(dst_path, file_hash)
|
|
203
|
+
|
|
204
|
+
# Finalize progress indicator values:
|
|
167
205
|
progress.update(backup_count=backup_count, backup_size=backup_size)
|
|
168
206
|
|
|
169
207
|
print(f'\nBackup complete: {backup_dir} (total size {human_filesize(backup_size)})\n')
|
|
@@ -9,7 +9,7 @@ from cli_base.cli_tools.verbosity import setup_logging
|
|
|
9
9
|
from cli_base.tyro_commands import TyroVerbosityArgType
|
|
10
10
|
from rich import print # noqa
|
|
11
11
|
|
|
12
|
-
from PyHardLinkBackup.
|
|
12
|
+
from PyHardLinkBackup.cli_dev import app
|
|
13
13
|
from PyHardLinkBackup.utilities.filesystem import iter_scandir_files
|
|
14
14
|
|
|
15
15
|
|
|
@@ -26,30 +26,29 @@ def benchmark_hashes(
|
|
|
26
26
|
verbosity: TyroVerbosityArgType = 1,
|
|
27
27
|
) -> None:
|
|
28
28
|
"""
|
|
29
|
-
Benchmark different file hashing algorithms on the given path
|
|
30
|
-
|
|
31
|
-
Example output:
|
|
32
|
-
|
|
33
|
-
Total files hashed: 220, total size: 1187.7 MiB
|
|
34
|
-
|
|
35
|
-
Results:
|
|
36
|
-
Total file content read time: 1.7817s
|
|
37
|
-
|
|
38
|
-
sha1 | Total: 0.6827s | 0.4x hash/read
|
|
39
|
-
sha256 | Total: 0.7189s | 0.4x hash/read
|
|
40
|
-
sha224 | Total: 0.7375s | 0.4x hash/read
|
|
41
|
-
sha384 | Total: 1.6552s | 0.9x hash/read
|
|
42
|
-
blake2b | Total: 1.6708s | 0.9x hash/read
|
|
43
|
-
md5 | Total: 1.6870s | 0.9x hash/read
|
|
44
|
-
sha512 | Total: 1.7269s | 1.0x hash/read
|
|
45
|
-
shake_128 | Total: 1.9834s | 1.1x hash/read
|
|
46
|
-
sha3_224 | Total: 2.3006s | 1.3x hash/read
|
|
47
|
-
sha3_256 | Total: 2.3856s | 1.3x hash/read
|
|
48
|
-
shake_256 | Total: 2.4375s | 1.4x hash/read
|
|
49
|
-
blake2s | Total: 2.5219s | 1.4x hash/read
|
|
50
|
-
sha3_384 | Total: 3.2596s | 1.8x hash/read
|
|
51
|
-
sha3_512 | Total: 4.5328s | 2.5x hash/read
|
|
29
|
+
Benchmark different file hashing algorithms on the given path.
|
|
52
30
|
"""
|
|
31
|
+
# Example output:
|
|
32
|
+
#
|
|
33
|
+
# Total files hashed: 220, total size: 1187.7 MiB
|
|
34
|
+
#
|
|
35
|
+
# Results:
|
|
36
|
+
# Total file content read time: 1.7817s
|
|
37
|
+
#
|
|
38
|
+
# sha1 | Total: 0.6827s | 0.4x hash/read
|
|
39
|
+
# sha256 | Total: 0.7189s | 0.4x hash/read
|
|
40
|
+
# sha224 | Total: 0.7375s | 0.4x hash/read
|
|
41
|
+
# sha384 | Total: 1.6552s | 0.9x hash/read
|
|
42
|
+
# blake2b | Total: 1.6708s | 0.9x hash/read
|
|
43
|
+
# md5 | Total: 1.6870s | 0.9x hash/read
|
|
44
|
+
# sha512 | Total: 1.7269s | 1.0x hash/read
|
|
45
|
+
# shake_128 | Total: 1.9834s | 1.1x hash/read
|
|
46
|
+
# sha3_224 | Total: 2.3006s | 1.3x hash/read
|
|
47
|
+
# sha3_256 | Total: 2.3856s | 1.3x hash/read
|
|
48
|
+
# shake_256 | Total: 2.4375s | 1.4x hash/read
|
|
49
|
+
# blake2s | Total: 2.5219s | 1.4x hash/read
|
|
50
|
+
# sha3_384 | Total: 3.2596s | 1.8x hash/read
|
|
51
|
+
# sha3_512 | Total: 4.5328s | 2.5x hash/read
|
|
53
52
|
setup_logging(verbosity=verbosity)
|
|
54
53
|
assert_is_dir(base_path)
|
|
55
54
|
print(f'Benchmarking file hashes under: {base_path}')
|