PyHardLinkBackup 1.0.0__tar.gz → 1.0.0rc0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PKG-INFO +62 -83
  2. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/__init__.py +3 -2
  3. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/backup.py +8 -20
  4. {pyhardlinkbackup-1.0.0/PyHardLinkBackup/cli_dev → pyhardlinkbackup-1.0.0rc0/PyHardLinkBackup/cli_app}/benchmark.py +24 -23
  5. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/cli_app/phlb.py +1 -1
  6. pyhardlinkbackup-1.0.0rc0/PyHardLinkBackup/tests/test_backup.py +188 -0
  7. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/tests/test_readme.py +0 -9
  8. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/file_hash_database.py +2 -7
  9. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/file_size_database.py +10 -16
  10. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/filesystem.py +9 -20
  11. pyhardlinkbackup-1.0.0rc0/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +68 -0
  12. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/tests/test_file_size_database.py +0 -12
  13. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/tests/test_filesystem.py +2 -6
  14. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/README.md +61 -82
  15. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/cli.py +1 -1
  16. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/dev-cli.py +1 -1
  17. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/pyproject.toml +2 -11
  18. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/uv.lock +6 -17
  19. pyhardlinkbackup-1.0.0/.run/only DocWrite.run.xml +0 -24
  20. pyhardlinkbackup-1.0.0/PyHardLinkBackup/tests/test_backup.py +0 -399
  21. pyhardlinkbackup-1.0.0/PyHardLinkBackup/tests/test_doc_write.py +0 -25
  22. pyhardlinkbackup-1.0.0/PyHardLinkBackup/utilities/tests/test_file_hash_database.py +0 -134
  23. pyhardlinkbackup-1.0.0/docs/README.md +0 -57
  24. pyhardlinkbackup-1.0.0/docs/about-docs.md +0 -8
  25. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.editorconfig +0 -0
  26. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.github/workflows/tests.yml +0 -0
  27. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.gitignore +0 -0
  28. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.idea/.gitignore +0 -0
  29. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.pre-commit-config.yaml +0 -0
  30. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.pre-commit-hooks.yaml +0 -0
  31. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.run/Template Python tests.run.xml +0 -0
  32. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.run/Unittests - __all__.run.xml +0 -0
  33. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.run/cli.py --help.run.xml +0 -0
  34. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.run/dev-cli update.run.xml +0 -0
  35. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.run/only DocTests.run.xml +0 -0
  36. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/.venv-app/lib/python3.12/site-packages/cli_base/tests/shell_complete_snapshots/.gitignore +0 -0
  37. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/__main__.py +0 -0
  38. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/cli_app/__init__.py +0 -0
  39. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/cli_dev/__init__.py +0 -0
  40. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/cli_dev/code_style.py +0 -0
  41. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/cli_dev/packaging.py +0 -0
  42. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/cli_dev/shell_completion.py +0 -0
  43. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/cli_dev/testing.py +0 -0
  44. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/cli_dev/update_readme_history.py +0 -0
  45. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/constants.py +0 -0
  46. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/tests/__init__.py +0 -0
  47. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/tests/test_doctests.py +0 -0
  48. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/tests/test_project_setup.py +0 -0
  49. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/tests/test_readme_history.py +0 -0
  50. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/__init__.py +0 -0
  51. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/humanize.py +0 -0
  52. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/rich_utils.py +0 -0
  53. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/PyHardLinkBackup/utilities/tests/__init__.py +0 -0
  54. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/dist/.gitignore +0 -0
  55. {pyhardlinkbackup-1.0.0 → pyhardlinkbackup-1.0.0rc0}/noxfile.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PyHardLinkBackup
3
- Version: 1.0.0
3
+ Version: 1.0.0rc0
4
4
  Summary: HardLink/Deduplication Backups with Python
5
5
  Project-URL: Documentation, https://github.com/jedie/PyHardLinkBackup
6
6
  Project-URL: Source, https://github.com/jedie/PyHardLinkBackup
@@ -23,71 +23,7 @@ Description-Content-Type: text/markdown
23
23
 
24
24
  HardLink/Deduplication Backups with Python
25
25
 
26
- **WIP:** v1.0.0 is a complete rewrite of PyHardLinkBackup.
27
-
28
- ## installation
29
-
30
- You can use [pipx](https://pipx.pypa.io/stable/installation/) to install and use PyHardLinkBackup, e.g.:
31
-
32
- ```bash
33
- sudo apt install pipx
34
-
35
- pipx install PyHardLinkBackup
36
- ```
37
-
38
- After this you can call the CLI via `phlb` command.
39
- The main command is `phlb backup <source> <destination>`:
40
-
41
- [comment]: <> (✂✂✂ auto generated backup help start ✂✂✂)
42
- ```
43
- usage: phlb backup [-h] source destination [--excludes [STR [STR ...]]] [-v]
44
-
45
- Backup the source directory to the destination directory using hard links for deduplication.
46
-
47
- ╭─ positional arguments ───────────────────────────────────────────────────────────────────────────────────────────────╮
48
- │ source Source directory to back up. (required) │
49
- │ destination Destination directory for the backup. (required) │
50
- ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
51
- ╭─ options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
52
- │ -h, --help show this help message and exit │
53
- │ --excludes [STR [STR ...]] │
54
- │ List of directory or file names to exclude from backup. (default: __pycache__ .cache .temp .tmp .tox │
55
- │ .nox) │
56
- │ -v, --verbosity │
57
- │ Verbosity level; e.g.: -v, -vv, -vvv, etc. (repeatable) │
58
- ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
59
- ```
60
- [comment]: <> (✂✂✂ auto generated backup help end ✂✂✂)
61
-
62
-
63
-
64
- complete help for main CLI app:
65
-
66
- [comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
67
- ```
68
- usage: phlb [-h] {backup,version}
69
-
70
-
71
-
72
- ╭─ options ─────────────────────────────────────────────────────────────────────────────────────────────────╮
73
- │ -h, --help show this help message and exit │
74
- ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────╯
75
- ╭─ subcommands ─────────────────────────────────────────────────────────────────────────────────────────────╮
76
- │ (required) │
77
- │ • backup Backup the source directory to the destination directory using hard links for deduplication. │
78
- │ • version Print version and exit │
79
- ╰───────────────────────────────────────────────────────────────────────────────────────────────────────────╯
80
- ```
81
- [comment]: <> (✂✂✂ auto generated main help end ✂✂✂)
82
-
83
-
84
- ### update
85
-
86
- If you use pipx, just call:
87
- ```bash
88
- pipx upgrade PyHardLinkBackup
89
- ```
90
- see: https://pipx.pypa.io/stable/docs/#pipx-upgrade
26
+ **WIP:** v1.0.0 is a complete rewrite of PyHardLinkBackup. The new version is not usable, yet!
91
27
 
92
28
 
93
29
  ## concept
@@ -147,18 +83,70 @@ e.g.: hash like `abcdef123...` stored in: `{destination}/.phlb/hash-lookup/ab/cd
147
83
  The file contains only the relative path to the first hardlink of this file content.
148
84
 
149
85
 
150
- ## start development
86
+ ## CLI - backup command
87
+
88
+ The main command is `backup`:
89
+
90
+ [comment]: <> (✂✂✂ auto generated backup help start ✂✂✂)
91
+ ```
92
+ usage: ./cli.py backup [-h] source destination [--excludes STR|{[STR [STR ...]]}] [-v]
93
+
94
+ Backup the source directory to the destination directory using hard links for deduplication.
151
95
 
152
- ```bash
153
- ~$ git clone https://github.com/jedie/PyHardLinkBackup.git
154
- ~$ cd PyHardLinkBackup
155
- ~/PyHardLinkBackup$ ./cli.py --help
156
- ~/PyHardLinkBackup$ ./dev-cli.py --help
96
+ ╭─ positional arguments ───────────────────────────────────────────────────────────────────────────────────────────────╮
97
+ source Source directory to back up. (required) │
98
+ destination Destination directory for the backup. (required) │
99
+ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
100
+ ╭─ options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
101
+ │ -h, --help show this help message and exit │
102
+ │ --excludes STR|{[STR [STR ...]]} │
103
+ │ List of directory or file names to exclude from backup. (default: __pycache__ .cache .temp .tmp .tox │
104
+ │ .nox) │
105
+ │ -v, --verbosity │
106
+ │ Verbosity level; e.g.: -v, -vv, -vvv, etc. (repeatable) │
107
+ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
157
108
  ```
109
+ [comment]: <> (✂✂✂ auto generated backup help end ✂✂✂)
110
+
111
+
112
+ ## CLI - main app help
113
+
114
+ [comment]: <> (✂✂✂ auto generated main help start ✂✂✂)
115
+ ```
116
+ usage: ./cli.py [-h] {backup,benchmark-hashes,version}
117
+
118
+
119
+
120
+ ╭─ options ────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
121
+ │ -h, --help show this help message and exit │
122
+ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
123
+ ╭─ subcommands ────────────────────────────────────────────────────────────────────────────────────────────────────────╮
124
+ │ (required) │
125
+ │ • backup Backup the source directory to the destination directory using hard links for deduplication. │
126
+ │ • benchmark-hashes Benchmark different file hashing algorithms on the given path Example output: │
127
+ │ │
128
+ │ Total files hashed: 220, total size: 1187.7 MiB │
129
+ │ │
130
+ │ Results: Total file content read time: 1.7817s │
131
+ │ │
132
+ │ sha1 | Total: 0.6827s | 0.4x hash/read sha256 | Total: 0.7189s | 0.4x hash/read │
133
+ │ sha224 | Total: 0.7375s | 0.4x hash/read sha384 | Total: 1.6552s | 0.9x hash/read │
134
+ │ blake2b | Total: 1.6708s | 0.9x hash/read md5 | Total: 1.6870s | 0.9x hash/read │
135
+ │ sha512 | Total: 1.7269s | 1.0x hash/read shake_128 | Total: 1.9834s | 1.1x hash/read │
136
+ │ sha3_224 | Total: 2.3006s | 1.3x hash/read sha3_256 | Total: 2.3856s | 1.3x hash/read │
137
+ │ shake_256 | Total: 2.4375s | 1.4x hash/read blake2s | Total: 2.5219s | 1.4x hash/read │
138
+ │ sha3_384 | Total: 3.2596s | 1.8x hash/read sha3_512 | Total: 4.5328s | 2.5x hash/read │
139
+ │ • version Print version and exit │
140
+ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
141
+ ```
142
+ [comment]: <> (✂✂✂ auto generated main help end ✂✂✂)
143
+
144
+
145
+ ## dev CLI
158
146
 
159
147
  [comment]: <> (✂✂✂ auto generated dev help start ✂✂✂)
160
148
  ```
161
- usage: ./dev-cli.py [-h] {benchmark-hashes,coverage,install,lint,mypy,nox,pip-audit,publish,shell-completion,test,update,update-readme-history,update-test-snapshot-files,version}
149
+ usage: ./dev-cli.py [-h] {coverage,install,lint,mypy,nox,pip-audit,publish,shell-completion,test,update,update-readme-history,update-test-snapshot-files,version}
162
150
 
163
151
 
164
152
 
@@ -167,8 +155,6 @@ usage: ./dev-cli.py [-h] {benchmark-hashes,coverage,install,lint,mypy,nox,pip-au
167
155
  ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
168
156
  ╭─ subcommands ────────────────────────────────────────────────────────────────────────────────────────────────────────╮
169
157
  │ (required) │
170
- │ • benchmark-hashes │
171
- │ Benchmark different file hashing algorithms on the given path. │
172
158
  │ • coverage Run tests and show coverage report. │
173
159
  │ • install Install requirements and 'PyHardLinkBackup' via pip as editable. │
174
160
  │ • lint Check/fix code style by run: "ruff check --fix" │
@@ -204,14 +190,7 @@ v1 is a complete rewrite of PyHardLinkBackup.
204
190
 
205
191
  [comment]: <> (✂✂✂ auto generated history start ✂✂✂)
206
192
 
207
- * [v1.0.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.13.0...v1.0.0)
208
- * 2026-01-13 - Change "./cli.py" to "phlb" (because it's the name installed via pipx)
209
- * 2026-01-13 - Update README
210
- * 2026-01-13 - Fix benchmark moved to dev CLI ;)
211
- * 2026-01-13 - Remove tyro warning
212
- * 2026-01-13 - Move "benchmark_hashes" from app to dev cli (It's more for testing)
213
- * 2026-01-13 - Rename [project.scripts] hooks
214
- * 2026-01-13 - Add DocWrite, handle broken symlinks, keep file meta, handle missing hardlink sources
193
+ * [v1.0.0rc0](https://github.com/jedie/PyHardLinkBackup/compare/v0.13.0...v1.0.0rc0)
215
194
  * 2026-01-12 - First working iteration with rich progess bar
216
195
  * 2026-01-08 - Rewrite everything
217
196
  * [v0.13.0](https://github.com/jedie/PyHardLinkBackup/compare/v0.12.3...v0.13.0)
@@ -1,7 +1,8 @@
1
- """DocWrite: README.md # PyHardLinkBackup
1
+ """
2
+ PyHardLinkBackup
2
3
  HardLink/Deduplication Backups with Python
3
4
  """
4
5
 
5
6
  # See https://packaging.python.org/en/latest/specifications/version-specifiers/
6
- __version__ = '1.0.0'
7
+ __version__ = '1.0.0rc0'
7
8
  __author__ = 'Jens Diemer <PyHardLinkBackup@jensdiemer.de>'
@@ -84,22 +84,7 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
84
84
  next_update = 0
85
85
  for entry in iter_scandir_files(src_root, excludes=excludes):
86
86
  backup_count += 1
87
- src_path = Path(entry.path)
88
-
89
- dst_path = backup_dir / src_path.relative_to(src_root)
90
- dst_dir_path = dst_path.parent
91
- if not dst_dir_path.exists():
92
- dst_dir_path.mkdir(parents=True, exist_ok=False)
93
-
94
- try:
95
- size = entry.stat().st_size
96
- except FileNotFoundError:
97
- # e.g.: Handle broken symlink
98
- target = os.readlink(src_path)
99
- dst_path.symlink_to(target)
100
- symlink_files += 1
101
- continue
102
-
87
+ size = entry.stat().st_size
103
88
  backup_size += size
104
89
 
105
90
  now = time.monotonic()
@@ -107,8 +92,14 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
107
92
  progress.update(backup_count=backup_count, backup_size=backup_size)
108
93
  next_update = now + 0.5
109
94
 
95
+ src_path = Path(entry.path)
96
+ dst_path = backup_dir / src_path.relative_to(src_root)
97
+
98
+ dst_path.parent.mkdir(parents=True, exist_ok=True)
99
+
110
100
  if entry.is_symlink():
111
101
  logger.debug('Copy symlink: %s to %s', src_path, dst_path)
102
+ # Copy symlinks as-is
112
103
  target = os.readlink(src_path)
113
104
  dst_path.symlink_to(target)
114
105
  symlink_files += 1
@@ -161,12 +152,10 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
161
152
  hardlinked_size += size
162
153
  else:
163
154
  logger.info('Copy unique file: %s to %s', src_path, dst_path)
155
+ shutil.copy2(src_path, dst_path)
164
156
  hash_db[file_hash] = dst_path
165
157
  copied_files += 1
166
158
  copied_size += size
167
-
168
- # Keep original file metadata (permission bits, time stamps, and flags)
169
- shutil.copy2(src_path, dst_path)
170
159
  else:
171
160
  # A file with this size not backuped before -> Can't be duplicate -> copy and hash
172
161
  file_hash = copy_and_hash(src_path, dst_path)
@@ -175,7 +164,6 @@ def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> Bac
175
164
  copied_files += 1
176
165
  copied_size += size
177
166
 
178
- # Finalize progress indicator values:
179
167
  progress.update(backup_count=backup_count, backup_size=backup_size)
180
168
 
181
169
  print(f'\nBackup complete: {backup_dir} (total size {human_filesize(backup_size)})\n')
@@ -9,7 +9,7 @@ from cli_base.cli_tools.verbosity import setup_logging
9
9
  from cli_base.tyro_commands import TyroVerbosityArgType
10
10
  from rich import print # noqa
11
11
 
12
- from PyHardLinkBackup.cli_dev import app
12
+ from PyHardLinkBackup.cli_app import app
13
13
  from PyHardLinkBackup.utilities.filesystem import iter_scandir_files
14
14
 
15
15
 
@@ -26,29 +26,30 @@ def benchmark_hashes(
26
26
  verbosity: TyroVerbosityArgType = 1,
27
27
  ) -> None:
28
28
  """
29
- Benchmark different file hashing algorithms on the given path.
29
+ Benchmark different file hashing algorithms on the given path
30
+
31
+ Example output:
32
+
33
+ Total files hashed: 220, total size: 1187.7 MiB
34
+
35
+ Results:
36
+ Total file content read time: 1.7817s
37
+
38
+ sha1 | Total: 0.6827s | 0.4x hash/read
39
+ sha256 | Total: 0.7189s | 0.4x hash/read
40
+ sha224 | Total: 0.7375s | 0.4x hash/read
41
+ sha384 | Total: 1.6552s | 0.9x hash/read
42
+ blake2b | Total: 1.6708s | 0.9x hash/read
43
+ md5 | Total: 1.6870s | 0.9x hash/read
44
+ sha512 | Total: 1.7269s | 1.0x hash/read
45
+ shake_128 | Total: 1.9834s | 1.1x hash/read
46
+ sha3_224 | Total: 2.3006s | 1.3x hash/read
47
+ sha3_256 | Total: 2.3856s | 1.3x hash/read
48
+ shake_256 | Total: 2.4375s | 1.4x hash/read
49
+ blake2s | Total: 2.5219s | 1.4x hash/read
50
+ sha3_384 | Total: 3.2596s | 1.8x hash/read
51
+ sha3_512 | Total: 4.5328s | 2.5x hash/read
30
52
  """
31
- # Example output:
32
- #
33
- # Total files hashed: 220, total size: 1187.7 MiB
34
- #
35
- # Results:
36
- # Total file content read time: 1.7817s
37
- #
38
- # sha1 | Total: 0.6827s | 0.4x hash/read
39
- # sha256 | Total: 0.7189s | 0.4x hash/read
40
- # sha224 | Total: 0.7375s | 0.4x hash/read
41
- # sha384 | Total: 1.6552s | 0.9x hash/read
42
- # blake2b | Total: 1.6708s | 0.9x hash/read
43
- # md5 | Total: 1.6870s | 0.9x hash/read
44
- # sha512 | Total: 1.7269s | 1.0x hash/read
45
- # shake_128 | Total: 1.9834s | 1.1x hash/read
46
- # sha3_224 | Total: 2.3006s | 1.3x hash/read
47
- # sha3_256 | Total: 2.3856s | 1.3x hash/read
48
- # shake_256 | Total: 2.4375s | 1.4x hash/read
49
- # blake2s | Total: 2.5219s | 1.4x hash/read
50
- # sha3_384 | Total: 3.2596s | 1.8x hash/read
51
- # sha3_512 | Total: 4.5328s | 2.5x hash/read
52
53
  setup_logging(verbosity=verbosity)
53
54
  assert_is_dir(base_path)
54
55
  print(f'Benchmarking file hashes under: {base_path}')
@@ -32,7 +32,7 @@ def backup(
32
32
  ],
33
33
  /,
34
34
  excludes: Annotated[
35
- tuple,
35
+ tuple[str],
36
36
  tyro.conf.arg(
37
37
  help='List of directory or file names to exclude from backup.',
38
38
  ),
@@ -0,0 +1,188 @@
1
+ import os
2
+ import tempfile
3
+ import textwrap
4
+ import zlib
5
+ from pathlib import Path
6
+ from unittest import TestCase
7
+
8
+ from bx_py_utils.test_utils.assertion import assert_text_equal
9
+ from freezegun import freeze_time
10
+
11
+ from PyHardLinkBackup.backup import BackupResult, backup_tree
12
+ from PyHardLinkBackup.constants import CHUNK_SIZE
13
+ from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
14
+ from PyHardLinkBackup.utilities.filesystem import iter_scandir_files
15
+
16
+
17
+ def fs_tree_overview(root: Path) -> str:
18
+ lines = []
19
+ for entry in iter_scandir_files(root, excludes=set()):
20
+ file_path = Path(entry.path)
21
+ crc32 = zlib.crc32(file_path.read_bytes())
22
+ rel_path = file_path.relative_to(root)
23
+
24
+ nlink = entry.stat().st_nlink
25
+ if entry.is_symlink():
26
+ file_type = 'symlink'
27
+ elif nlink > 1:
28
+ file_type = 'hardlink'
29
+ else:
30
+ file_type = 'file'
31
+
32
+ lines.append(
33
+ f'{str(rel_path):<20} | {file_type:<8} | {nlink=} | {entry.stat().st_size:>8} Bytes | crc32: {crc32:08x}'
34
+ )
35
+ return '\n'.join(sorted(lines))
36
+
37
+
38
+ def assert_fs_tree_overview(root: Path, expected_overview: str):
39
+ expected_overview = textwrap.dedent(expected_overview).strip()
40
+ actual_overview = fs_tree_overview(root)
41
+ assert_text_equal(
42
+ actual_overview,
43
+ expected_overview,
44
+ msg=f'Filesystem tree overview does not match expected overview.\n\n{actual_overview}\n\n',
45
+ )
46
+
47
+
48
+ class BackupTreeTestCase(TestCase):
49
+ def test_happy_path(self):
50
+ with tempfile.TemporaryDirectory() as temp_dir:
51
+ temp_path = Path(temp_dir)
52
+
53
+ src_root = temp_path / 'source'
54
+ backup_root = temp_path / 'backup'
55
+
56
+ src_root.mkdir()
57
+ backup_root.mkdir()
58
+
59
+ file1_path = src_root / 'file2.txt'
60
+ file1_path.write_text('This is file 1')
61
+
62
+ (src_root / 'symlink2file1').symlink_to(file1_path)
63
+ os.link(file1_path, src_root / 'hardlink2file1')
64
+
65
+ sub_dir = src_root / 'subdir'
66
+ sub_dir.mkdir()
67
+ (sub_dir / 'file.txt').write_text('This is file in subdir')
68
+
69
+ # Only files bigger than MIN_SIZE will be considered for hardlinking:
70
+ size_db_min_file = src_root / 'min_sized_file1.bin'
71
+ size_db_min_file.write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
72
+
73
+ # Same content and big enough to be considered for hardlinking:
74
+ size_db_min_file = src_root / 'min_sized_file2.bin'
75
+ size_db_min_file.write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
76
+
77
+ # Larger then CHUNK_SIZE file will be handled differently:
78
+ large_file = src_root / 'large_file.bin'
79
+ large_file.write_bytes(b'Y' * (CHUNK_SIZE + 1))
80
+
81
+ excluded_dir = src_root / '.cache'
82
+ excluded_dir.mkdir()
83
+ (excluded_dir / 'tempfile.tmp').write_text('Temporary file that should be excluded')
84
+
85
+ #######################################################################################
86
+ # Create first backup:
87
+
88
+ with freeze_time('2026-01-01T12:34:56Z', auto_tick_seconds=0):
89
+ result = backup_tree(
90
+ src_root=src_root,
91
+ backup_root=backup_root,
92
+ excludes={'.cache'},
93
+ )
94
+ backup_dir = result.backup_dir
95
+ self.assertEqual(
96
+ str(Path(backup_dir).relative_to(temp_path)),
97
+ 'backup/source/20260101_123456',
98
+ )
99
+ self.assertEqual(
100
+ result,
101
+ BackupResult(
102
+ backup_dir=backup_dir,
103
+ backup_count=7,
104
+ backup_size=67110929,
105
+ symlink_files=1,
106
+ hardlinked_files=1,
107
+ hardlinked_size=1000,
108
+ copied_files=5,
109
+ copied_size=67109915,
110
+ copied_small_files=3,
111
+ copied_small_size=50,
112
+ ),
113
+ )
114
+
115
+ # The sources:
116
+ assert_fs_tree_overview(
117
+ root=src_root,
118
+ expected_overview="""
119
+ .cache/tempfile.tmp | file | nlink=1 | 38 Bytes | crc32: 41d7a2c9
120
+ file2.txt | hardlink | nlink=2 | 14 Bytes | crc32: 8a11514a
121
+ hardlink2file1 | hardlink | nlink=2 | 14 Bytes | crc32: 8a11514a
122
+ large_file.bin | file | nlink=1 | 67108865 Bytes | crc32: 9671eaac
123
+ min_sized_file1.bin | file | nlink=1 | 1000 Bytes | crc32: f0d93de4
124
+ min_sized_file2.bin | file | nlink=1 | 1000 Bytes | crc32: f0d93de4
125
+ subdir/file.txt | file | nlink=1 | 22 Bytes | crc32: c0167e63
126
+ symlink2file1 | symlink | nlink=2 | 14 Bytes | crc32: 8a11514a
127
+ """,
128
+ )
129
+ # The backup:
130
+ # * /.cache/ -> excluded
131
+ # * min_sized_file1.bin and min_sized_file2.bin -> hardlinked
132
+ assert_fs_tree_overview(
133
+ root=backup_dir,
134
+ expected_overview="""
135
+ file2.txt | file | nlink=1 | 14 Bytes | crc32: 8a11514a
136
+ hardlink2file1 | file | nlink=1 | 14 Bytes | crc32: 8a11514a
137
+ large_file.bin | file | nlink=1 | 67108865 Bytes | crc32: 9671eaac
138
+ min_sized_file1.bin | hardlink | nlink=2 | 1000 Bytes | crc32: f0d93de4
139
+ min_sized_file2.bin | hardlink | nlink=2 | 1000 Bytes | crc32: f0d93de4
140
+ subdir/file.txt | file | nlink=1 | 22 Bytes | crc32: c0167e63
141
+ symlink2file1 | symlink | nlink=2 | 14 Bytes | crc32: 8a11514a
142
+ """,
143
+ )
144
+
145
+ #######################################################################################
146
+ # Just backup again:
147
+
148
+ with freeze_time('2026-01-02T12:34:56Z', auto_tick_seconds=0):
149
+ result = backup_tree(
150
+ src_root=src_root,
151
+ backup_root=backup_root,
152
+ excludes={'.cache'},
153
+ )
154
+ backup_dir = result.backup_dir
155
+ self.assertEqual(
156
+ str(Path(backup_dir).relative_to(temp_path)),
157
+ 'backup/source/20260102_123456',
158
+ )
159
+ self.assertEqual(
160
+ result,
161
+ BackupResult(
162
+ backup_dir=backup_dir,
163
+ backup_count=7,
164
+ backup_size=67110929,
165
+ symlink_files=1,
166
+ hardlinked_files=3, # <<< More hardlinks this time!
167
+ hardlinked_size=67110865,
168
+ copied_files=3,
169
+ copied_size=50,
170
+ copied_small_files=3,
171
+ copied_small_size=50,
172
+ ),
173
+ )
174
+ # The second backup:
175
+ # * /.cache/ -> excluded
176
+ # * min_sized_file1.bin and min_sized_file2.bin -> hardlinked
177
+ assert_fs_tree_overview(
178
+ root=backup_dir,
179
+ expected_overview="""
180
+ file2.txt | file | nlink=1 | 14 Bytes | crc32: 8a11514a
181
+ hardlink2file1 | file | nlink=1 | 14 Bytes | crc32: 8a11514a
182
+ large_file.bin | hardlink | nlink=2 | 67108865 Bytes | crc32: 9671eaac
183
+ min_sized_file1.bin | hardlink | nlink=4 | 1000 Bytes | crc32: f0d93de4
184
+ min_sized_file2.bin | hardlink | nlink=4 | 1000 Bytes | crc32: f0d93de4
185
+ subdir/file.txt | file | nlink=1 | 22 Bytes | crc32: c0167e63
186
+ symlink2file1 | symlink | nlink=2 | 14 Bytes | crc32: 8a11514a
187
+ """,
188
+ )
@@ -26,7 +26,6 @@ class ReadmeTestCase(BaseTestCase):
26
26
  def test_main_help(self):
27
27
  with NoColorEnvRich():
28
28
  stdout = invoke(cli_bin=PACKAGE_ROOT / 'cli.py', args=['--help'], strip_line_prefix='usage: ')
29
-
30
29
  self.assert_in_content(
31
30
  got=stdout,
32
31
  parts=(
@@ -36,10 +35,6 @@ class ReadmeTestCase(BaseTestCase):
36
35
  constants.CLI_EPILOG,
37
36
  ),
38
37
  )
39
-
40
- # Installed via pipx is called 'phlb', not 'cli.py':
41
- stdout = stdout.replace('./cli.py', 'phlb')
42
-
43
38
  assert_cli_help_in_readme(text_block=stdout, marker='main help')
44
39
 
45
40
  def test_backup_help(self):
@@ -52,10 +47,6 @@ class ReadmeTestCase(BaseTestCase):
52
47
  'Backup the source directory to the destination',
53
48
  ),
54
49
  )
55
-
56
- # Installed via pipx is called 'phlb', not 'cli.py':
57
- stdout = stdout.replace('./cli.py', 'phlb')
58
-
59
50
  assert_cli_help_in_readme(text_block=stdout, marker='backup help')
60
51
 
61
52
  def test_dev_help(self):
@@ -1,4 +1,3 @@
1
- import logging
2
1
  from pathlib import Path
3
2
 
4
3
 
@@ -7,8 +6,8 @@ class HashAlreadyExistsError(ValueError):
7
6
 
8
7
 
9
8
  class FileHashDatabase:
10
- """DocWrite: README.md ## FileHashDatabase
11
- A simple "database" to store file content hash <-> relative path mappings.
9
+ """
10
+ A simple database to store file content hash <-> relative path mappings.
12
11
  Uses a directory structure to avoid too many files in a single directory.
13
12
  Path structure:
14
13
  {base_dst}/.phlb/hash-lookup/{XX}/{YY}/{hash}
@@ -40,10 +39,6 @@ class FileHashDatabase:
40
39
  return None
41
40
  else:
42
41
  abs_file_path = self.backup_root / rel_file_path
43
- if not abs_file_path.is_file():
44
- logging.warning('Hash database entry found, but file does not exist: %s', abs_file_path)
45
- hash_path.unlink()
46
- return None
47
42
  return abs_file_path
48
43
 
49
44
  def __setitem__(self, hash: str, abs_file_path: Path):
@@ -2,11 +2,17 @@ from pathlib import Path
2
2
 
3
3
 
4
4
  class FileSizeDatabase:
5
- """DocWrite: README.md ## FileSizeDatabase
6
- A simple "database" to track which file sizes have been seen.
7
-
5
+ """
6
+ A simple database to track which file sizes have been seen.
8
7
  Uses a directory structure to avoid too many files in a single directory.
9
- We don't "cache" anything in Memory, to avoid high memory consumption for large datasets.
8
+
9
+ Path structure:
10
+ {base_dst}/.phlb/size-lookup/{XX}/{YY}/{size}
11
+ e.g.:
12
+ 1234567890 results in: {base_dst}/.phlb/size-lookup/12/34/1234567890
13
+
14
+ Notes:
15
+ * We don't "cache" anything in Memory, to avoid high memory consumption for large datasets.
10
16
  """
11
17
 
12
18
  MIN_SIZE = 1000 # no padding is made, so the min size is 1000 bytes!
@@ -18,15 +24,6 @@ class FileSizeDatabase:
18
24
  def _get_size_path(self, size: int) -> Path:
19
25
  assert size >= self.MIN_SIZE, f'Size must be at least {self.MIN_SIZE} bytes'
20
26
  size_str = str(size)
21
-
22
- """DocWrite: README.md ## FileSizeDatabase
23
- Path structure:
24
- * `{base_dst}/.phlb/size-lookup/{XX}/{YY}/{size}`
25
-
26
- e.g.:
27
-
28
- * `1234567890` results in: `{base_dst}/.phlb/size-lookup/12/34/1234567890`
29
- """
30
27
  first_dir_name = size_str[:2]
31
28
  second_dir_name = size_str[2:4]
32
29
  size_path = self.base_path / first_dir_name / second_dir_name / size_str
@@ -40,7 +37,4 @@ class FileSizeDatabase:
40
37
  size_path = self._get_size_path(size)
41
38
  if not size_path.exists():
42
39
  size_path.parent.mkdir(parents=True, exist_ok=True)
43
-
44
- """DocWrite: README.md ## FileSizeDatabase
45
- All files are created empty, as we only care about their existence."""
46
40
  size_path.touch(exist_ok=False)