PyHardLinkBackup 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyHardLinkBackup/__init__.py +7 -0
- PyHardLinkBackup/__main__.py +10 -0
- PyHardLinkBackup/backup.py +203 -0
- PyHardLinkBackup/cli_app/__init__.py +41 -0
- PyHardLinkBackup/cli_app/phlb.py +50 -0
- PyHardLinkBackup/cli_dev/__init__.py +70 -0
- PyHardLinkBackup/cli_dev/benchmark.py +119 -0
- PyHardLinkBackup/cli_dev/code_style.py +12 -0
- PyHardLinkBackup/cli_dev/packaging.py +65 -0
- PyHardLinkBackup/cli_dev/shell_completion.py +23 -0
- PyHardLinkBackup/cli_dev/testing.py +52 -0
- PyHardLinkBackup/cli_dev/update_readme_history.py +33 -0
- PyHardLinkBackup/constants.py +16 -0
- PyHardLinkBackup/tests/__init__.py +36 -0
- PyHardLinkBackup/tests/test_backup.py +399 -0
- PyHardLinkBackup/tests/test_doc_write.py +25 -0
- PyHardLinkBackup/tests/test_doctests.py +10 -0
- PyHardLinkBackup/tests/test_project_setup.py +46 -0
- PyHardLinkBackup/tests/test_readme.py +75 -0
- PyHardLinkBackup/tests/test_readme_history.py +8 -0
- PyHardLinkBackup/utilities/__init__.py +0 -0
- PyHardLinkBackup/utilities/file_hash_database.py +58 -0
- PyHardLinkBackup/utilities/file_size_database.py +46 -0
- PyHardLinkBackup/utilities/filesystem.py +133 -0
- PyHardLinkBackup/utilities/humanize.py +22 -0
- PyHardLinkBackup/utilities/rich_utils.py +98 -0
- PyHardLinkBackup/utilities/tests/__init__.py +0 -0
- PyHardLinkBackup/utilities/tests/test_file_hash_database.py +134 -0
- PyHardLinkBackup/utilities/tests/test_file_size_database.py +131 -0
- PyHardLinkBackup/utilities/tests/test_filesystem.py +94 -0
- pyhardlinkbackup-1.0.0.dist-info/METADATA +547 -0
- pyhardlinkbackup-1.0.0.dist-info/RECORD +34 -0
- pyhardlinkbackup-1.0.0.dist-info/WHEEL +4 -0
- pyhardlinkbackup-1.0.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from rich import print # noqa
|
|
11
|
+
|
|
12
|
+
from PyHardLinkBackup.constants import CHUNK_SIZE
|
|
13
|
+
from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
|
|
14
|
+
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
15
|
+
from PyHardLinkBackup.utilities.filesystem import (
|
|
16
|
+
copy_and_hash,
|
|
17
|
+
hash_file,
|
|
18
|
+
humanized_fs_scan,
|
|
19
|
+
iter_scandir_files,
|
|
20
|
+
read_and_hash_file,
|
|
21
|
+
)
|
|
22
|
+
from PyHardLinkBackup.utilities.humanize import human_filesize
|
|
23
|
+
from PyHardLinkBackup.utilities.rich_utils import BackupProgress
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclasses.dataclass
|
|
30
|
+
class BackupResult:
|
|
31
|
+
backup_dir: Path
|
|
32
|
+
backup_count: int
|
|
33
|
+
backup_size: int
|
|
34
|
+
symlink_files: int
|
|
35
|
+
hardlinked_files: int
|
|
36
|
+
hardlinked_size: int
|
|
37
|
+
copied_files: int
|
|
38
|
+
copied_size: int
|
|
39
|
+
copied_small_files: int
|
|
40
|
+
copied_small_size: int
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def backup_tree(*, src_root: Path, backup_root: Path, excludes: set[str]) -> BackupResult:
|
|
44
|
+
src_root = src_root.resolve()
|
|
45
|
+
if not src_root.is_dir():
|
|
46
|
+
print('Error: Source directory does not exist!')
|
|
47
|
+
print(f'Please check source directory: "{src_root}"\n')
|
|
48
|
+
sys.exit(1)
|
|
49
|
+
|
|
50
|
+
backup_root = backup_root.resolve()
|
|
51
|
+
if not backup_root.is_dir():
|
|
52
|
+
print('Error: Backup directory does not exist!')
|
|
53
|
+
print(f'Please create "{backup_root}" directory first and start again!\n')
|
|
54
|
+
sys.exit(1)
|
|
55
|
+
|
|
56
|
+
# Step 1: Scan source directory:
|
|
57
|
+
src_file_count, src_total_size = humanized_fs_scan(src_root, excludes)
|
|
58
|
+
|
|
59
|
+
phlb_conf_dir = backup_root / '.phlb'
|
|
60
|
+
phlb_conf_dir.mkdir(parents=False, exist_ok=True)
|
|
61
|
+
|
|
62
|
+
backup_dir = backup_root / src_root.name / datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
63
|
+
logger.info('Backup %s to %s', src_root, backup_dir)
|
|
64
|
+
backup_dir.mkdir(parents=True, exist_ok=False)
|
|
65
|
+
|
|
66
|
+
print(f'\nBackup to {backup_dir}...\n')
|
|
67
|
+
|
|
68
|
+
with BackupProgress(src_file_count, src_total_size) as progress:
|
|
69
|
+
# "Databases" for deduplication
|
|
70
|
+
size_db = FileSizeDatabase(phlb_conf_dir)
|
|
71
|
+
hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
|
|
72
|
+
|
|
73
|
+
backup_count = 0
|
|
74
|
+
backup_size = 0
|
|
75
|
+
|
|
76
|
+
symlink_files = 0
|
|
77
|
+
hardlinked_files = 0
|
|
78
|
+
hardlinked_size = 0
|
|
79
|
+
copied_files = 0
|
|
80
|
+
copied_size = 0
|
|
81
|
+
copied_small_files = 0
|
|
82
|
+
copied_small_size = 0
|
|
83
|
+
|
|
84
|
+
next_update = 0
|
|
85
|
+
for entry in iter_scandir_files(src_root, excludes=excludes):
|
|
86
|
+
backup_count += 1
|
|
87
|
+
src_path = Path(entry.path)
|
|
88
|
+
|
|
89
|
+
dst_path = backup_dir / src_path.relative_to(src_root)
|
|
90
|
+
dst_dir_path = dst_path.parent
|
|
91
|
+
if not dst_dir_path.exists():
|
|
92
|
+
dst_dir_path.mkdir(parents=True, exist_ok=False)
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
size = entry.stat().st_size
|
|
96
|
+
except FileNotFoundError:
|
|
97
|
+
# e.g.: Handle broken symlink
|
|
98
|
+
target = os.readlink(src_path)
|
|
99
|
+
dst_path.symlink_to(target)
|
|
100
|
+
symlink_files += 1
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
backup_size += size
|
|
104
|
+
|
|
105
|
+
now = time.monotonic()
|
|
106
|
+
if now >= next_update:
|
|
107
|
+
progress.update(backup_count=backup_count, backup_size=backup_size)
|
|
108
|
+
next_update = now + 0.5
|
|
109
|
+
|
|
110
|
+
if entry.is_symlink():
|
|
111
|
+
logger.debug('Copy symlink: %s to %s', src_path, dst_path)
|
|
112
|
+
target = os.readlink(src_path)
|
|
113
|
+
dst_path.symlink_to(target)
|
|
114
|
+
symlink_files += 1
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
# Process regular files
|
|
118
|
+
assert entry.is_file(follow_symlinks=False), f'Unexpected non-file: {src_path}'
|
|
119
|
+
|
|
120
|
+
# Deduplication logic
|
|
121
|
+
|
|
122
|
+
if size < size_db.MIN_SIZE:
|
|
123
|
+
# Small file -> always copy without deduplication
|
|
124
|
+
logger.info('Copy small file: %s to %s', src_path, dst_path)
|
|
125
|
+
shutil.copy2(src_path, dst_path)
|
|
126
|
+
copied_files += 1
|
|
127
|
+
copied_size += size
|
|
128
|
+
copied_small_files += 1
|
|
129
|
+
copied_small_size += size
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
if size in size_db:
|
|
133
|
+
logger.debug('File with size %iBytes found before -> hash: %s', size, src_path)
|
|
134
|
+
|
|
135
|
+
if size <= CHUNK_SIZE:
|
|
136
|
+
# File can be read complete into memory
|
|
137
|
+
logger.debug(
|
|
138
|
+
'File size %iBytes <= CHUNK_SIZE (%iBytes) -> read complete into memory', size, CHUNK_SIZE
|
|
139
|
+
)
|
|
140
|
+
file_content, file_hash = read_and_hash_file(src_path)
|
|
141
|
+
if existing_path := hash_db.get(file_hash):
|
|
142
|
+
logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
|
|
143
|
+
os.link(existing_path, dst_path)
|
|
144
|
+
hardlinked_files += 1
|
|
145
|
+
hardlinked_size += size
|
|
146
|
+
else:
|
|
147
|
+
logger.info('Store unique file: %s to %s', src_path, dst_path)
|
|
148
|
+
dst_path.write_bytes(file_content)
|
|
149
|
+
hash_db[file_hash] = dst_path
|
|
150
|
+
copied_files += 1
|
|
151
|
+
copied_size += size
|
|
152
|
+
|
|
153
|
+
else:
|
|
154
|
+
# Large file
|
|
155
|
+
file_hash = hash_file(src_path) # Calculate hash without copying
|
|
156
|
+
|
|
157
|
+
if existing_path := hash_db.get(file_hash):
|
|
158
|
+
logger.info('Hardlink duplicate file: %s to %s', dst_path, existing_path)
|
|
159
|
+
os.link(existing_path, dst_path)
|
|
160
|
+
hardlinked_files += 1
|
|
161
|
+
hardlinked_size += size
|
|
162
|
+
else:
|
|
163
|
+
logger.info('Copy unique file: %s to %s', src_path, dst_path)
|
|
164
|
+
hash_db[file_hash] = dst_path
|
|
165
|
+
copied_files += 1
|
|
166
|
+
copied_size += size
|
|
167
|
+
|
|
168
|
+
# Keep original file metadata (permission bits, time stamps, and flags)
|
|
169
|
+
shutil.copy2(src_path, dst_path)
|
|
170
|
+
else:
|
|
171
|
+
# A file with this size not backuped before -> Can't be duplicate -> copy and hash
|
|
172
|
+
file_hash = copy_and_hash(src_path, dst_path)
|
|
173
|
+
size_db.add(size)
|
|
174
|
+
hash_db[file_hash] = dst_path
|
|
175
|
+
copied_files += 1
|
|
176
|
+
copied_size += size
|
|
177
|
+
|
|
178
|
+
# Finalize progress indicator values:
|
|
179
|
+
progress.update(backup_count=backup_count, backup_size=backup_size)
|
|
180
|
+
|
|
181
|
+
print(f'\nBackup complete: {backup_dir} (total size {human_filesize(backup_size)})\n')
|
|
182
|
+
print(f' Total files processed: {backup_count}')
|
|
183
|
+
print(f' * Symlinked files: {symlink_files}')
|
|
184
|
+
print(f' * Hardlinked files: {hardlinked_files} (saved {human_filesize(hardlinked_size)})')
|
|
185
|
+
print(f' * Copied files: {copied_files} (total {human_filesize(copied_size)})')
|
|
186
|
+
print(
|
|
187
|
+
f' of which small (<{size_db.MIN_SIZE} Bytes) files: {copied_small_files}'
|
|
188
|
+
f' (total {human_filesize(copied_small_size)})'
|
|
189
|
+
)
|
|
190
|
+
print()
|
|
191
|
+
|
|
192
|
+
return BackupResult(
|
|
193
|
+
backup_dir=backup_dir,
|
|
194
|
+
backup_count=backup_count,
|
|
195
|
+
backup_size=backup_size,
|
|
196
|
+
symlink_files=symlink_files,
|
|
197
|
+
hardlinked_files=hardlinked_files,
|
|
198
|
+
hardlinked_size=hardlinked_size,
|
|
199
|
+
copied_files=copied_files,
|
|
200
|
+
copied_size=copied_size,
|
|
201
|
+
copied_small_files=copied_small_files,
|
|
202
|
+
copied_small_size=copied_small_size,
|
|
203
|
+
)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CLI for usage
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import sys
|
|
7
|
+
from collections.abc import Sequence
|
|
8
|
+
|
|
9
|
+
from cli_base.autodiscover import import_all_files
|
|
10
|
+
from cli_base.cli_tools.version_info import print_version
|
|
11
|
+
from rich import print # noqa
|
|
12
|
+
from tyro.extras import SubcommandApp
|
|
13
|
+
|
|
14
|
+
import PyHardLinkBackup
|
|
15
|
+
from PyHardLinkBackup import constants
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
app = SubcommandApp()
|
|
21
|
+
|
|
22
|
+
# Register all CLI commands, just by import all files in this package:
|
|
23
|
+
import_all_files(package=__package__, init_file=__file__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@app.command
|
|
27
|
+
def version():
|
|
28
|
+
"""Print version and exit"""
|
|
29
|
+
# Pseudo command, because the version always printed on every CLI call ;)
|
|
30
|
+
sys.exit(0)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def main(args: Sequence[str] | None = None):
|
|
34
|
+
print_version(PyHardLinkBackup)
|
|
35
|
+
app.cli(
|
|
36
|
+
prog='./cli.py',
|
|
37
|
+
description=constants.CLI_EPILOG,
|
|
38
|
+
use_underscores=False, # use hyphens instead of underscores
|
|
39
|
+
sort_subcommands=True,
|
|
40
|
+
args=args,
|
|
41
|
+
)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Annotated
|
|
4
|
+
|
|
5
|
+
import tyro
|
|
6
|
+
from cli_base.cli_tools.verbosity import setup_logging
|
|
7
|
+
from cli_base.tyro_commands import TyroVerbosityArgType
|
|
8
|
+
from rich import print # noqa
|
|
9
|
+
|
|
10
|
+
from PyHardLinkBackup.backup import backup_tree
|
|
11
|
+
from PyHardLinkBackup.cli_app import app
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@app.command
|
|
18
|
+
def backup(
|
|
19
|
+
src: Annotated[
|
|
20
|
+
Path,
|
|
21
|
+
tyro.conf.arg(
|
|
22
|
+
metavar='source',
|
|
23
|
+
help='Source directory to back up.',
|
|
24
|
+
),
|
|
25
|
+
],
|
|
26
|
+
dst: Annotated[
|
|
27
|
+
Path,
|
|
28
|
+
tyro.conf.arg(
|
|
29
|
+
metavar='destination',
|
|
30
|
+
help='Destination directory for the backup.',
|
|
31
|
+
),
|
|
32
|
+
],
|
|
33
|
+
/,
|
|
34
|
+
excludes: Annotated[
|
|
35
|
+
tuple,
|
|
36
|
+
tyro.conf.arg(
|
|
37
|
+
help='List of directory or file names to exclude from backup.',
|
|
38
|
+
),
|
|
39
|
+
] = ('__pycache__', '.cache', '.temp', '.tmp', '.tox', '.nox'),
|
|
40
|
+
verbosity: TyroVerbosityArgType = 2,
|
|
41
|
+
) -> None:
|
|
42
|
+
"""
|
|
43
|
+
Backup the source directory to the destination directory using hard links for deduplication.
|
|
44
|
+
"""
|
|
45
|
+
setup_logging(verbosity=verbosity)
|
|
46
|
+
backup_tree(
|
|
47
|
+
src_root=src,
|
|
48
|
+
backup_root=dst,
|
|
49
|
+
excludes=set(excludes),
|
|
50
|
+
)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CLI for development
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import importlib
|
|
6
|
+
import logging
|
|
7
|
+
import sys
|
|
8
|
+
from collections.abc import Sequence
|
|
9
|
+
|
|
10
|
+
from bx_py_utils.path import assert_is_file
|
|
11
|
+
from cli_base.autodiscover import import_all_files
|
|
12
|
+
from cli_base.cli_tools.dev_tools import run_coverage, run_nox, run_unittest_cli
|
|
13
|
+
from cli_base.cli_tools.version_info import print_version
|
|
14
|
+
from typeguard import install_import_hook
|
|
15
|
+
from tyro.extras import SubcommandApp
|
|
16
|
+
|
|
17
|
+
import PyHardLinkBackup
|
|
18
|
+
from PyHardLinkBackup import constants
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# Check type annotations via typeguard in all tests.
|
|
22
|
+
# Sadly we must activate this here and can't do this in ./tests/__init__.py
|
|
23
|
+
install_import_hook(packages=('PyHardLinkBackup',))
|
|
24
|
+
|
|
25
|
+
# reload the module, after the typeguard import hook is activated:
|
|
26
|
+
importlib.reload(PyHardLinkBackup)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
PACKAGE_ROOT = constants.BASE_PATH.parent
|
|
33
|
+
assert_is_file(PACKAGE_ROOT / 'pyproject.toml') # Exists only in cloned git repo
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
app = SubcommandApp()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# Register all CLI commands, just by import all files in this package:
|
|
40
|
+
import_all_files(package=__package__, init_file=__file__)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@app.command
|
|
44
|
+
def version():
|
|
45
|
+
"""Print version and exit"""
|
|
46
|
+
# Pseudo command, because the version always printed on every CLI call ;)
|
|
47
|
+
sys.exit(0)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def main(args: Sequence[str] | None = None):
|
|
51
|
+
print_version(PyHardLinkBackup)
|
|
52
|
+
|
|
53
|
+
if len(sys.argv) >= 2:
|
|
54
|
+
# Check if we can just pass a command call to origin CLI:
|
|
55
|
+
command = sys.argv[1]
|
|
56
|
+
command_map = {
|
|
57
|
+
'test': run_unittest_cli,
|
|
58
|
+
'nox': run_nox,
|
|
59
|
+
'coverage': run_coverage,
|
|
60
|
+
}
|
|
61
|
+
if real_func := command_map.get(command):
|
|
62
|
+
real_func(argv=sys.argv, exit_after_run=True)
|
|
63
|
+
|
|
64
|
+
app.cli(
|
|
65
|
+
prog='./dev-cli.py',
|
|
66
|
+
description=constants.CLI_EPILOG,
|
|
67
|
+
use_underscores=False, # use hyphens instead of underscores
|
|
68
|
+
sort_subcommands=True,
|
|
69
|
+
args=args,
|
|
70
|
+
)
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
import hashlib
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from bx_py_utils.path import assert_is_dir
|
|
8
|
+
from cli_base.cli_tools.verbosity import setup_logging
|
|
9
|
+
from cli_base.tyro_commands import TyroVerbosityArgType
|
|
10
|
+
from rich import print # noqa
|
|
11
|
+
|
|
12
|
+
from PyHardLinkBackup.cli_dev import app
|
|
13
|
+
from PyHardLinkBackup.utilities.filesystem import iter_scandir_files
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@app.command
|
|
20
|
+
def benchmark_hashes(
|
|
21
|
+
base_path: Path,
|
|
22
|
+
/,
|
|
23
|
+
max_duration: int = 30, # in seconds
|
|
24
|
+
min_file_size: int = 15 * 1024, # 15 KiB
|
|
25
|
+
max_file_size: int = 100 * 1024 * 1024, # 100 MiB
|
|
26
|
+
verbosity: TyroVerbosityArgType = 1,
|
|
27
|
+
) -> None:
|
|
28
|
+
"""
|
|
29
|
+
Benchmark different file hashing algorithms on the given path.
|
|
30
|
+
"""
|
|
31
|
+
# Example output:
|
|
32
|
+
#
|
|
33
|
+
# Total files hashed: 220, total size: 1187.7 MiB
|
|
34
|
+
#
|
|
35
|
+
# Results:
|
|
36
|
+
# Total file content read time: 1.7817s
|
|
37
|
+
#
|
|
38
|
+
# sha1 | Total: 0.6827s | 0.4x hash/read
|
|
39
|
+
# sha256 | Total: 0.7189s | 0.4x hash/read
|
|
40
|
+
# sha224 | Total: 0.7375s | 0.4x hash/read
|
|
41
|
+
# sha384 | Total: 1.6552s | 0.9x hash/read
|
|
42
|
+
# blake2b | Total: 1.6708s | 0.9x hash/read
|
|
43
|
+
# md5 | Total: 1.6870s | 0.9x hash/read
|
|
44
|
+
# sha512 | Total: 1.7269s | 1.0x hash/read
|
|
45
|
+
# shake_128 | Total: 1.9834s | 1.1x hash/read
|
|
46
|
+
# sha3_224 | Total: 2.3006s | 1.3x hash/read
|
|
47
|
+
# sha3_256 | Total: 2.3856s | 1.3x hash/read
|
|
48
|
+
# shake_256 | Total: 2.4375s | 1.4x hash/read
|
|
49
|
+
# blake2s | Total: 2.5219s | 1.4x hash/read
|
|
50
|
+
# sha3_384 | Total: 3.2596s | 1.8x hash/read
|
|
51
|
+
# sha3_512 | Total: 4.5328s | 2.5x hash/read
|
|
52
|
+
setup_logging(verbosity=verbosity)
|
|
53
|
+
assert_is_dir(base_path)
|
|
54
|
+
print(f'Benchmarking file hashes under: {base_path}')
|
|
55
|
+
|
|
56
|
+
print(f'Min file size: {min_file_size} bytes')
|
|
57
|
+
print(f'Max file size: {max_file_size} bytes')
|
|
58
|
+
print(f'Max duration: {max_duration} seconds')
|
|
59
|
+
|
|
60
|
+
algorithms = sorted(hashlib.algorithms_guaranteed)
|
|
61
|
+
print(f'\nUsing {len(algorithms)} guaranteed algorithms: {algorithms}')
|
|
62
|
+
print('-' * 80)
|
|
63
|
+
|
|
64
|
+
file_count = 0
|
|
65
|
+
total_size = 0
|
|
66
|
+
total_read_time = 0.0
|
|
67
|
+
results = collections.defaultdict(set)
|
|
68
|
+
|
|
69
|
+
start_time = time.time()
|
|
70
|
+
stop_time = start_time + max_duration
|
|
71
|
+
next_update = start_time + 2
|
|
72
|
+
|
|
73
|
+
for dir_entry in iter_scandir_files(base_path):
|
|
74
|
+
entry_stat = dir_entry.stat()
|
|
75
|
+
file_size = entry_stat.st_size
|
|
76
|
+
if not (min_file_size <= file_size <= max_file_size):
|
|
77
|
+
continue
|
|
78
|
+
|
|
79
|
+
start_time = time.perf_counter()
|
|
80
|
+
file_content = Path(dir_entry.path).read_bytes()
|
|
81
|
+
duration = time.perf_counter() - start_time
|
|
82
|
+
total_read_time += duration
|
|
83
|
+
|
|
84
|
+
for algo in algorithms:
|
|
85
|
+
# Actual measurement:
|
|
86
|
+
start_time = time.perf_counter()
|
|
87
|
+
hashlib.new(algo, file_content)
|
|
88
|
+
duration = time.perf_counter() - start_time
|
|
89
|
+
|
|
90
|
+
results[algo].add(duration)
|
|
91
|
+
|
|
92
|
+
file_count += 1
|
|
93
|
+
total_size += entry_stat.st_size
|
|
94
|
+
|
|
95
|
+
now = time.time()
|
|
96
|
+
if now >= stop_time:
|
|
97
|
+
print('Reached max duration limit, stopping benchmark...')
|
|
98
|
+
break
|
|
99
|
+
|
|
100
|
+
if now >= next_update:
|
|
101
|
+
percent = (now - (stop_time - max_duration)) / max_duration * 100
|
|
102
|
+
print(
|
|
103
|
+
f'{int(percent)}% Processed {file_count} files so far,'
|
|
104
|
+
f' total size: {total_size / 1024 / 1024:.1f} MiB...'
|
|
105
|
+
)
|
|
106
|
+
next_update = now + 2
|
|
107
|
+
|
|
108
|
+
print(f'\nTotal files hashed: {file_count}, total size: {total_size / 1024 / 1024:.1f} MiB')
|
|
109
|
+
|
|
110
|
+
print('\nResults:')
|
|
111
|
+
print(f'Total file content read time: {total_read_time:.4f}s\n')
|
|
112
|
+
|
|
113
|
+
sorted_results = sorted(
|
|
114
|
+
((algo, sum(durations)) for algo, durations in results.items()),
|
|
115
|
+
key=lambda x: x[1], # Sort by total_duration
|
|
116
|
+
)
|
|
117
|
+
for algo, total_duration in sorted_results:
|
|
118
|
+
ratio = total_duration / total_read_time
|
|
119
|
+
print(f'{algo:10} | Total: {total_duration:.4f}s | {ratio:.1f}x hash/read')
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from cli_base.cli_tools.code_style import assert_code_style
|
|
2
|
+
from cli_base.tyro_commands import TyroVerbosityArgType
|
|
3
|
+
|
|
4
|
+
from PyHardLinkBackup.cli_dev import PACKAGE_ROOT, app
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@app.command
|
|
8
|
+
def lint(verbosity: TyroVerbosityArgType = 1):
|
|
9
|
+
"""
|
|
10
|
+
Check/fix code style by run: "ruff check --fix"
|
|
11
|
+
"""
|
|
12
|
+
assert_code_style(package_root=PACKAGE_ROOT, verbose=bool(verbosity), sys_exit=True)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from cli_base.cli_tools.dev_tools import run_unittest_cli
|
|
4
|
+
from cli_base.cli_tools.subprocess_utils import ToolsExecutor
|
|
5
|
+
from cli_base.cli_tools.verbosity import setup_logging
|
|
6
|
+
from cli_base.run_pip_audit import run_pip_audit
|
|
7
|
+
from cli_base.tyro_commands import TyroVerbosityArgType
|
|
8
|
+
from manageprojects.utilities.publish import publish_package
|
|
9
|
+
|
|
10
|
+
import PyHardLinkBackup
|
|
11
|
+
from PyHardLinkBackup.cli_dev import PACKAGE_ROOT, app
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@app.command
|
|
18
|
+
def install():
|
|
19
|
+
"""
|
|
20
|
+
Install requirements and 'PyHardLinkBackup' via pip as editable.
|
|
21
|
+
"""
|
|
22
|
+
tools_executor = ToolsExecutor(cwd=PACKAGE_ROOT)
|
|
23
|
+
tools_executor.verbose_check_call('uv', 'sync')
|
|
24
|
+
tools_executor.verbose_check_call('pip', 'install', '--no-deps', '-e', '.')
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@app.command
|
|
28
|
+
def pip_audit(verbosity: TyroVerbosityArgType):
|
|
29
|
+
"""
|
|
30
|
+
Run pip-audit check against current requirements files
|
|
31
|
+
"""
|
|
32
|
+
setup_logging(verbosity=verbosity)
|
|
33
|
+
run_pip_audit(base_path=PACKAGE_ROOT, verbosity=verbosity)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@app.command
|
|
37
|
+
def update(verbosity: TyroVerbosityArgType):
|
|
38
|
+
"""
|
|
39
|
+
Update dependencies (uv.lock) and git pre-commit hooks
|
|
40
|
+
"""
|
|
41
|
+
setup_logging(verbosity=verbosity)
|
|
42
|
+
|
|
43
|
+
tools_executor = ToolsExecutor(cwd=PACKAGE_ROOT)
|
|
44
|
+
|
|
45
|
+
tools_executor.verbose_check_call('pip', 'install', '-U', 'pip')
|
|
46
|
+
tools_executor.verbose_check_call('pip', 'install', '-U', 'uv')
|
|
47
|
+
tools_executor.verbose_check_call('uv', 'lock', '--upgrade')
|
|
48
|
+
|
|
49
|
+
run_pip_audit(base_path=PACKAGE_ROOT, verbosity=verbosity)
|
|
50
|
+
|
|
51
|
+
# Install new dependencies in current .venv:
|
|
52
|
+
tools_executor.verbose_check_call('uv', 'sync')
|
|
53
|
+
|
|
54
|
+
# Update git pre-commit hooks:
|
|
55
|
+
tools_executor.verbose_check_call('pre-commit', 'autoupdate')
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@app.command
|
|
59
|
+
def publish():
|
|
60
|
+
"""
|
|
61
|
+
Build and upload this project to PyPi
|
|
62
|
+
"""
|
|
63
|
+
run_unittest_cli(verbose=False, exit_after_run=False) # Don't publish a broken state
|
|
64
|
+
|
|
65
|
+
publish_package(module=PyHardLinkBackup, package_path=PACKAGE_ROOT)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from cli_base.cli_tools.shell_completion import setup_tyro_shell_completion
|
|
4
|
+
from cli_base.cli_tools.verbosity import setup_logging
|
|
5
|
+
from cli_base.tyro_commands import TyroVerbosityArgType
|
|
6
|
+
from rich import print # noqa
|
|
7
|
+
|
|
8
|
+
from PyHardLinkBackup.cli_dev import app
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@app.command
|
|
15
|
+
def shell_completion(verbosity: TyroVerbosityArgType = 1, remove: bool = False) -> None:
|
|
16
|
+
"""
|
|
17
|
+
Setup shell completion for this CLI (Currently only for bash shell)
|
|
18
|
+
"""
|
|
19
|
+
setup_logging(verbosity=verbosity)
|
|
20
|
+
setup_tyro_shell_completion(
|
|
21
|
+
prog_name='PyHardLinkBackup_dev_cli',
|
|
22
|
+
remove=remove,
|
|
23
|
+
)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from cli_base.cli_tools.dev_tools import run_coverage, run_nox, run_unittest_cli
|
|
2
|
+
from cli_base.cli_tools.subprocess_utils import verbose_check_call
|
|
3
|
+
from cli_base.cli_tools.test_utils.snapshot import UpdateTestSnapshotFiles
|
|
4
|
+
from cli_base.tyro_commands import TyroVerbosityArgType
|
|
5
|
+
|
|
6
|
+
from PyHardLinkBackup.cli_dev import PACKAGE_ROOT, app
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@app.command
|
|
10
|
+
def mypy(verbosity: TyroVerbosityArgType):
|
|
11
|
+
"""Run Mypy (configured in pyproject.toml)"""
|
|
12
|
+
verbose_check_call('mypy', '.', cwd=PACKAGE_ROOT, verbose=verbosity > 0, exit_on_error=True)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@app.command
|
|
16
|
+
def update_test_snapshot_files(verbosity: TyroVerbosityArgType):
|
|
17
|
+
"""
|
|
18
|
+
Update all test snapshot files (by remove and recreate all snapshot files)
|
|
19
|
+
"""
|
|
20
|
+
with UpdateTestSnapshotFiles(root_path=PACKAGE_ROOT, verbose=verbosity > 0):
|
|
21
|
+
# Just recreate them by running tests:
|
|
22
|
+
run_unittest_cli(
|
|
23
|
+
extra_env=dict(
|
|
24
|
+
RAISE_SNAPSHOT_ERRORS='0', # Recreate snapshot files without error
|
|
25
|
+
),
|
|
26
|
+
verbose=verbosity > 1,
|
|
27
|
+
exit_after_run=False,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@app.command # Dummy command
|
|
32
|
+
def test():
|
|
33
|
+
"""
|
|
34
|
+
Run unittests
|
|
35
|
+
"""
|
|
36
|
+
run_unittest_cli()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@app.command # Dummy command
|
|
40
|
+
def coverage():
|
|
41
|
+
"""
|
|
42
|
+
Run tests and show coverage report.
|
|
43
|
+
"""
|
|
44
|
+
run_coverage()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@app.command # Dummy "nox" command
|
|
48
|
+
def nox():
|
|
49
|
+
"""
|
|
50
|
+
Run nox
|
|
51
|
+
"""
|
|
52
|
+
run_nox()
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from cli_base.cli_tools import git_history
|
|
6
|
+
from cli_base.cli_tools.verbosity import setup_logging
|
|
7
|
+
from cli_base.tyro_commands import TyroVerbosityArgType
|
|
8
|
+
from rich import print # noqa
|
|
9
|
+
|
|
10
|
+
from PyHardLinkBackup.cli_dev import app
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@app.command
|
|
17
|
+
def update_readme_history(verbosity: TyroVerbosityArgType):
|
|
18
|
+
"""
|
|
19
|
+
Update project history base on git commits/tags in README.md
|
|
20
|
+
|
|
21
|
+
Will be exited with 1 if the README.md was updated otherwise with 0.
|
|
22
|
+
|
|
23
|
+
Also, callable via e.g.:
|
|
24
|
+
python -m cli_base update-readme-history -v
|
|
25
|
+
"""
|
|
26
|
+
setup_logging(verbosity=verbosity)
|
|
27
|
+
|
|
28
|
+
logger.debug('%s called. CWD: %s', __name__, Path.cwd())
|
|
29
|
+
updated = git_history.update_readme_history(verbosity=verbosity)
|
|
30
|
+
exit_code = 1 if updated else 0
|
|
31
|
+
if verbosity:
|
|
32
|
+
print(f'{exit_code=}')
|
|
33
|
+
sys.exit(exit_code)
|