PyHardLinkBackup 1.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyHardLinkBackup/__init__.py +7 -0
- PyHardLinkBackup/__main__.py +10 -0
- PyHardLinkBackup/backup.py +297 -0
- PyHardLinkBackup/cli_app/__init__.py +41 -0
- PyHardLinkBackup/cli_app/phlb.py +136 -0
- PyHardLinkBackup/cli_dev/__init__.py +70 -0
- PyHardLinkBackup/cli_dev/__main__.py +10 -0
- PyHardLinkBackup/cli_dev/benchmark.py +138 -0
- PyHardLinkBackup/cli_dev/code_style.py +12 -0
- PyHardLinkBackup/cli_dev/debugging.py +47 -0
- PyHardLinkBackup/cli_dev/packaging.py +62 -0
- PyHardLinkBackup/cli_dev/shell_completion.py +23 -0
- PyHardLinkBackup/cli_dev/testing.py +52 -0
- PyHardLinkBackup/cli_dev/update_readme_history.py +33 -0
- PyHardLinkBackup/compare_backup.py +259 -0
- PyHardLinkBackup/constants.py +18 -0
- PyHardLinkBackup/logging_setup.py +124 -0
- PyHardLinkBackup/rebuild_databases.py +217 -0
- PyHardLinkBackup/tests/__init__.py +36 -0
- PyHardLinkBackup/tests/test_backup.py +1167 -0
- PyHardLinkBackup/tests/test_compare_backup.py +167 -0
- PyHardLinkBackup/tests/test_doc_write.py +26 -0
- PyHardLinkBackup/tests/test_doctests.py +10 -0
- PyHardLinkBackup/tests/test_project_setup.py +46 -0
- PyHardLinkBackup/tests/test_readme.py +75 -0
- PyHardLinkBackup/tests/test_readme_history.py +9 -0
- PyHardLinkBackup/tests/test_rebuild_database.py +266 -0
- PyHardLinkBackup/utilities/__init__.py +0 -0
- PyHardLinkBackup/utilities/file_hash_database.py +62 -0
- PyHardLinkBackup/utilities/file_size_database.py +46 -0
- PyHardLinkBackup/utilities/filesystem.py +257 -0
- PyHardLinkBackup/utilities/humanize.py +39 -0
- PyHardLinkBackup/utilities/rich_utils.py +237 -0
- PyHardLinkBackup/utilities/sha256sums.py +61 -0
- PyHardLinkBackup/utilities/tee.py +40 -0
- PyHardLinkBackup/utilities/tests/__init__.py +0 -0
- PyHardLinkBackup/utilities/tests/test_file_hash_database.py +153 -0
- PyHardLinkBackup/utilities/tests/test_file_size_database.py +151 -0
- PyHardLinkBackup/utilities/tests/test_filesystem.py +167 -0
- PyHardLinkBackup/utilities/tests/unittest_utilities.py +78 -0
- PyHardLinkBackup/utilities/tyro_cli_shared_args.py +29 -0
- pyhardlinkbackup-1.8.1.dist-info/METADATA +700 -0
- pyhardlinkbackup-1.8.1.dist-info/RECORD +45 -0
- pyhardlinkbackup-1.8.1.dist-info/WHEEL +4 -0
- pyhardlinkbackup-1.8.1.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class FileSizeDatabase:
|
|
5
|
+
"""DocWrite: README.md ## FileSizeDatabase
|
|
6
|
+
A simple "database" to track which file sizes have been seen.
|
|
7
|
+
|
|
8
|
+
Uses a directory structure to avoid too many files in a single directory.
|
|
9
|
+
We don't "cache" anything in Memory, to avoid high memory consumption for large datasets.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
MIN_SIZE = 1000 # no padding is made, so the min size is 1000 bytes!
|
|
13
|
+
|
|
14
|
+
def __init__(self, phlb_conf_dir: Path):
|
|
15
|
+
self.base_path = phlb_conf_dir / 'size-lookup'
|
|
16
|
+
self.base_path.mkdir(parents=False, exist_ok=True)
|
|
17
|
+
|
|
18
|
+
def _get_size_path(self, size: int) -> Path:
|
|
19
|
+
assert size >= self.MIN_SIZE, f'Size must be at least {self.MIN_SIZE} bytes'
|
|
20
|
+
size_str = str(size)
|
|
21
|
+
|
|
22
|
+
"""DocWrite: README.md ## FileSizeDatabase
|
|
23
|
+
Path structure:
|
|
24
|
+
* `{base_dst}/.phlb/size-lookup/{XX}/{YY}/{size}`
|
|
25
|
+
|
|
26
|
+
e.g.:
|
|
27
|
+
|
|
28
|
+
* `1234567890` results in: `{base_dst}/.phlb/size-lookup/12/34/1234567890`
|
|
29
|
+
"""
|
|
30
|
+
first_dir_name = size_str[:2]
|
|
31
|
+
second_dir_name = size_str[2:4]
|
|
32
|
+
size_path = self.base_path / first_dir_name / second_dir_name / size_str
|
|
33
|
+
return size_path
|
|
34
|
+
|
|
35
|
+
def __contains__(self, size: int) -> bool:
|
|
36
|
+
size_path = self._get_size_path(size)
|
|
37
|
+
return size_path.exists()
|
|
38
|
+
|
|
39
|
+
def add(self, size: int):
|
|
40
|
+
size_path = self._get_size_path(size)
|
|
41
|
+
if not size_path.exists():
|
|
42
|
+
size_path.parent.mkdir(parents=True, exist_ok=True)
|
|
43
|
+
|
|
44
|
+
"""DocWrite: README.md ## FileSizeDatabase
|
|
45
|
+
All files are created empty, as we only care about their existence."""
|
|
46
|
+
size_path.touch(exist_ok=False)
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
import time
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Iterable
|
|
8
|
+
|
|
9
|
+
from bx_py_utils.path import assert_is_dir
|
|
10
|
+
from rich.progress import (
|
|
11
|
+
Progress,
|
|
12
|
+
SpinnerColumn,
|
|
13
|
+
TextColumn,
|
|
14
|
+
TimeElapsedColumn,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from PyHardLinkBackup.constants import CHUNK_SIZE, HASH_ALGO
|
|
18
|
+
from PyHardLinkBackup.utilities.rich_utils import DisplayFileTreeProgress, HumanFileSizeColumn, LargeFileProgress
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
MIN_SIZE_FOR_PROGRESS_BAR = CHUNK_SIZE * 10
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def verbose_path_stat(path: Path) -> os.stat_result:
|
|
27
|
+
stat_result = path.stat()
|
|
28
|
+
stat_dict = {}
|
|
29
|
+
for key in dir(stat_result):
|
|
30
|
+
if key.startswith('st_'):
|
|
31
|
+
value = getattr(stat_result, key)
|
|
32
|
+
stat_dict[key] = value
|
|
33
|
+
logger.info('Stat for %s: %s', path, stat_dict)
|
|
34
|
+
return stat_result
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class RemoveFileOnError:
|
|
38
|
+
def __init__(self, file_path: Path):
|
|
39
|
+
self.file_path = file_path
|
|
40
|
+
|
|
41
|
+
def __enter__(self):
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
def __exit__(self, exc_type, exc_value, exc_traceback):
|
|
45
|
+
if exc_type:
|
|
46
|
+
logger.info(
|
|
47
|
+
f'Removing incomplete file {self.file_path} due to error: {exc_value}',
|
|
48
|
+
exc_info=(exc_type, exc_value, exc_traceback),
|
|
49
|
+
)
|
|
50
|
+
self.file_path.unlink(missing_ok=True)
|
|
51
|
+
return False
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def hash_file(path: Path, progress: DisplayFileTreeProgress, total_size: int) -> str:
|
|
55
|
+
logger.debug('Hash file %s using %s', path, HASH_ALGO)
|
|
56
|
+
hasher = hashlib.new(HASH_ALGO)
|
|
57
|
+
with LargeFileProgress(
|
|
58
|
+
description=f'Hashing large file: "[yellow]{path}[/yellow]"',
|
|
59
|
+
parent_progress=progress,
|
|
60
|
+
total_size=total_size,
|
|
61
|
+
) as progress_bar:
|
|
62
|
+
with path.open('rb') as f:
|
|
63
|
+
while chunk := f.read(CHUNK_SIZE):
|
|
64
|
+
hasher.update(chunk)
|
|
65
|
+
progress_bar.update(advance=len(chunk))
|
|
66
|
+
file_hash = hasher.hexdigest()
|
|
67
|
+
logger.info('%s %s hash: %s', path, HASH_ALGO, file_hash)
|
|
68
|
+
return file_hash
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def copy_with_progress(src: Path, dst: Path, progress: DisplayFileTreeProgress, total_size: int) -> None:
|
|
72
|
+
logger.debug('Copy file %s to %s using %s', src, dst, HASH_ALGO)
|
|
73
|
+
with LargeFileProgress(
|
|
74
|
+
description=f'Copying large file: "[yellow]{src}[/yellow]"',
|
|
75
|
+
parent_progress=progress,
|
|
76
|
+
total_size=total_size,
|
|
77
|
+
) as progress_bar:
|
|
78
|
+
with src.open('rb') as source_file, dst.open('wb') as dst_file:
|
|
79
|
+
while chunk := source_file.read(CHUNK_SIZE):
|
|
80
|
+
dst_file.write(chunk)
|
|
81
|
+
progress_bar.update(advance=len(chunk))
|
|
82
|
+
|
|
83
|
+
# Keep original file metadata (permission bits, last access time, last modification time, and flags)
|
|
84
|
+
shutil.copystat(src, dst)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def copy_and_hash(src: Path, dst: Path, progress: DisplayFileTreeProgress, total_size: int) -> str:
|
|
88
|
+
logger.debug('Copy and hash file %s to %s using %s', src, dst, HASH_ALGO)
|
|
89
|
+
hasher = hashlib.new(HASH_ALGO)
|
|
90
|
+
with LargeFileProgress(
|
|
91
|
+
description=f'Copy and hash large file: "[yellow]{src}[/yellow]"',
|
|
92
|
+
parent_progress=progress,
|
|
93
|
+
total_size=total_size,
|
|
94
|
+
) as progress_bar:
|
|
95
|
+
with src.open('rb') as source_file, dst.open('wb') as dst_file:
|
|
96
|
+
while chunk := source_file.read(CHUNK_SIZE):
|
|
97
|
+
dst_file.write(chunk)
|
|
98
|
+
hasher.update(chunk)
|
|
99
|
+
progress_bar.update(advance=len(chunk))
|
|
100
|
+
|
|
101
|
+
# Keep original file metadata (permission bits, last access time, last modification time, and flags)
|
|
102
|
+
shutil.copystat(src, dst)
|
|
103
|
+
|
|
104
|
+
file_hash = hasher.hexdigest()
|
|
105
|
+
logger.info('%s backup to %s with %s hash: %s', src, dst, HASH_ALGO, file_hash)
|
|
106
|
+
return file_hash
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def read_and_hash_file(path: Path) -> tuple[bytes, str]:
|
|
110
|
+
logger.debug('Read and hash file %s using %s into RAM', path, HASH_ALGO)
|
|
111
|
+
content = path.read_bytes()
|
|
112
|
+
hasher = hashlib.new(HASH_ALGO, content)
|
|
113
|
+
file_hash = hasher.hexdigest()
|
|
114
|
+
logger.info('%s %s hash: %s', path, HASH_ALGO, file_hash)
|
|
115
|
+
return content, file_hash
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def iter_scandir_files(
|
|
119
|
+
*,
|
|
120
|
+
path: Path,
|
|
121
|
+
one_file_system: bool,
|
|
122
|
+
src_device_id,
|
|
123
|
+
excludes: set[str],
|
|
124
|
+
) -> Iterable[os.DirEntry]:
|
|
125
|
+
"""
|
|
126
|
+
Recursively yield all files+symlinks in the given directory.
|
|
127
|
+
Note: Directory symlinks are treated as files (not recursed into).
|
|
128
|
+
"""
|
|
129
|
+
logger.debug('Scanning directory %s', path)
|
|
130
|
+
with os.scandir(path) as scandir_iterator:
|
|
131
|
+
for entry in scandir_iterator:
|
|
132
|
+
if entry.is_dir(
|
|
133
|
+
follow_symlinks=False, # Handle directory symlinks as files!
|
|
134
|
+
):
|
|
135
|
+
if entry.name in excludes:
|
|
136
|
+
logger.debug('Excluding directory %s', entry.path)
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
if one_file_system:
|
|
140
|
+
try:
|
|
141
|
+
entry_device_id = entry.stat(follow_symlinks=False).st_dev
|
|
142
|
+
except OSError as err:
|
|
143
|
+
# e.g.: broken symlink
|
|
144
|
+
logger.debug('Skipping directory %s: %s', entry.path, err)
|
|
145
|
+
continue
|
|
146
|
+
if entry_device_id != src_device_id:
|
|
147
|
+
logger.debug(
|
|
148
|
+
'Skipping directory %s: different device ID %s (src device ID: %s)',
|
|
149
|
+
entry.path,
|
|
150
|
+
entry_device_id,
|
|
151
|
+
src_device_id,
|
|
152
|
+
)
|
|
153
|
+
continue
|
|
154
|
+
|
|
155
|
+
yield from iter_scandir_files(
|
|
156
|
+
path=Path(entry.path),
|
|
157
|
+
one_file_system=one_file_system,
|
|
158
|
+
src_device_id=src_device_id,
|
|
159
|
+
excludes=excludes,
|
|
160
|
+
)
|
|
161
|
+
else:
|
|
162
|
+
# It's a file or symlink or broken symlink
|
|
163
|
+
yield entry
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def humanized_fs_scan(
|
|
167
|
+
*,
|
|
168
|
+
path: Path,
|
|
169
|
+
**iter_scandir_files_kwargs,
|
|
170
|
+
) -> tuple[int, int]:
|
|
171
|
+
print(f'\nScanning filesystem at: {path}...')
|
|
172
|
+
|
|
173
|
+
progress = Progress(
|
|
174
|
+
TimeElapsedColumn(),
|
|
175
|
+
'{task.description}',
|
|
176
|
+
SpinnerColumn('simpleDots'),
|
|
177
|
+
TextColumn('[green]{task.fields[file_count]} Files'),
|
|
178
|
+
'|',
|
|
179
|
+
HumanFileSizeColumn(field_name='total_size'),
|
|
180
|
+
'|',
|
|
181
|
+
TextColumn('[cyan]{task.fields[files_per_sec]} Files/sec'),
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
file_count = 0
|
|
185
|
+
total_size = 0
|
|
186
|
+
start_time = time.time()
|
|
187
|
+
scan_task_id = progress.add_task(
|
|
188
|
+
description='Scanning',
|
|
189
|
+
file_count=file_count,
|
|
190
|
+
total_size=total_size,
|
|
191
|
+
files_per_sec=0.0,
|
|
192
|
+
total=None,
|
|
193
|
+
)
|
|
194
|
+
next_update = 0
|
|
195
|
+
with progress:
|
|
196
|
+
for entry in iter_scandir_files(path=path, **iter_scandir_files_kwargs):
|
|
197
|
+
if not entry.is_file():
|
|
198
|
+
# Ignore e.g.: directory symlinks
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
file_count += 1
|
|
202
|
+
|
|
203
|
+
try:
|
|
204
|
+
total_size += entry.stat().st_size
|
|
205
|
+
except FileNotFoundError:
|
|
206
|
+
# e.g.: broken symlink
|
|
207
|
+
continue
|
|
208
|
+
|
|
209
|
+
now = time.time()
|
|
210
|
+
if now >= next_update:
|
|
211
|
+
elapsed = max(now - start_time, 1e-6)
|
|
212
|
+
files_per_sec = int(file_count / elapsed)
|
|
213
|
+
progress.update(
|
|
214
|
+
scan_task_id,
|
|
215
|
+
file_count=file_count,
|
|
216
|
+
total_size=total_size,
|
|
217
|
+
files_per_sec=files_per_sec,
|
|
218
|
+
)
|
|
219
|
+
next_update = now + 1
|
|
220
|
+
|
|
221
|
+
now = time.time()
|
|
222
|
+
|
|
223
|
+
elapsed = max(now - start_time, 1e-6)
|
|
224
|
+
files_per_sec = int(file_count / elapsed)
|
|
225
|
+
progress.stop_task(scan_task_id)
|
|
226
|
+
progress.update(
|
|
227
|
+
scan_task_id,
|
|
228
|
+
description='Completed',
|
|
229
|
+
completed=True,
|
|
230
|
+
file_count=file_count,
|
|
231
|
+
total_size=total_size,
|
|
232
|
+
files_per_sec=files_per_sec,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
return file_count, total_size
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def supports_hardlinks(directory: Path) -> bool:
|
|
239
|
+
logger.debug('Checking hardlink support in %s', directory)
|
|
240
|
+
assert_is_dir(directory)
|
|
241
|
+
test_src_file = directory / '.phlb_test'
|
|
242
|
+
test_dst_file = directory / '.phlb_test_link'
|
|
243
|
+
hardlinks_supported = False
|
|
244
|
+
try:
|
|
245
|
+
test_src_file.write_text('test')
|
|
246
|
+
os.link(test_src_file, test_dst_file)
|
|
247
|
+
assert test_dst_file.read_text() == 'test'
|
|
248
|
+
hardlinks_supported = True
|
|
249
|
+
except OSError as err:
|
|
250
|
+
# e.g.: FAT/exFAT filesystems ;)
|
|
251
|
+
logger.exception('Hardlink test failed in %s: %s', directory, err)
|
|
252
|
+
finally:
|
|
253
|
+
test_src_file.unlink(missing_ok=True)
|
|
254
|
+
test_dst_file.unlink(missing_ok=True)
|
|
255
|
+
|
|
256
|
+
logger.info('Hardlink support in %s: %s', directory, hardlinks_supported)
|
|
257
|
+
return hardlinks_supported
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
from bx_py_utils.humanize.time import human_timedelta
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def human_filesize(size: int | float) -> str:
|
|
7
|
+
"""
|
|
8
|
+
>>> human_filesize(1024)
|
|
9
|
+
'1.00 KiB'
|
|
10
|
+
>>> human_filesize(2.2*1024)
|
|
11
|
+
'2.20 KiB'
|
|
12
|
+
>>> human_filesize(3.33*1024*1024)
|
|
13
|
+
'3.33 MiB'
|
|
14
|
+
>>> human_filesize(4.44*1024*1024*1024)
|
|
15
|
+
'4.44 GiB'
|
|
16
|
+
>>> human_filesize(5.55*1024*1024*1024*1024)
|
|
17
|
+
'5.55 TiB'
|
|
18
|
+
>>> human_filesize(6.66*1024*1024*1024*1024*1024)
|
|
19
|
+
'6.66 PiB'
|
|
20
|
+
"""
|
|
21
|
+
for unit in ['Bytes', 'KiB', 'MiB', 'GiB', 'TiB']:
|
|
22
|
+
if size < 1024.0:
|
|
23
|
+
return f'{size:.2f} {unit}'
|
|
24
|
+
size /= 1024.0
|
|
25
|
+
return f'{size:.2f} PiB'
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class PrintTimingContextManager:
|
|
29
|
+
def __init__(self, description: str):
|
|
30
|
+
self.description = description
|
|
31
|
+
|
|
32
|
+
def __enter__(self) -> None:
|
|
33
|
+
self.start_time = time.perf_counter()
|
|
34
|
+
|
|
35
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
36
|
+
duration = time.perf_counter() - self.start_time
|
|
37
|
+
print(f'{self.description}: {human_timedelta(duration)}\n')
|
|
38
|
+
if exc_type:
|
|
39
|
+
return False # Do not suppress exceptions
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
from rich.live import Live
|
|
4
|
+
from rich.panel import Panel
|
|
5
|
+
from rich.progress import (
|
|
6
|
+
BarColumn,
|
|
7
|
+
Progress,
|
|
8
|
+
ProgressColumn,
|
|
9
|
+
Task,
|
|
10
|
+
TaskProgressColumn,
|
|
11
|
+
TextColumn,
|
|
12
|
+
TimeElapsedColumn,
|
|
13
|
+
TimeRemainingColumn,
|
|
14
|
+
TransferSpeedColumn,
|
|
15
|
+
)
|
|
16
|
+
from rich.style import Style
|
|
17
|
+
from rich.table import Table
|
|
18
|
+
from rich.text import Text
|
|
19
|
+
|
|
20
|
+
from PyHardLinkBackup.constants import LAGE_FILE_PROGRESS_MIN_SIZE
|
|
21
|
+
from PyHardLinkBackup.utilities.humanize import human_filesize
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class HumanFileSizeColumn(ProgressColumn):
|
|
25
|
+
def __init__(self, field_name: str | None = None, **kwargs) -> None:
|
|
26
|
+
super().__init__(**kwargs)
|
|
27
|
+
self.field_name = field_name
|
|
28
|
+
|
|
29
|
+
def render(self, task):
|
|
30
|
+
if self.field_name is None:
|
|
31
|
+
advance_size = task.completed
|
|
32
|
+
remaining_size = task.remaining
|
|
33
|
+
return (
|
|
34
|
+
f'[white][progress.elapsed]{human_filesize(advance_size)}[white]'
|
|
35
|
+
f' / [progress.remaining]{human_filesize(remaining_size)}'
|
|
36
|
+
)
|
|
37
|
+
else:
|
|
38
|
+
try:
|
|
39
|
+
advance_size = task.fields[self.field_name]
|
|
40
|
+
except KeyError:
|
|
41
|
+
raise KeyError(f'Field {self.field_name=} not found in: {task.fields.keys()=}') from None
|
|
42
|
+
return Text(human_filesize(advance_size))
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class TransferSpeedColumn2(ProgressColumn):
|
|
46
|
+
def __init__(self, *args, unit: str = 'it', **kwargs) -> None:
|
|
47
|
+
super().__init__(*args, **kwargs)
|
|
48
|
+
self.unit = unit
|
|
49
|
+
|
|
50
|
+
def render(self, task: Task) -> Text:
|
|
51
|
+
speed = task.finished_speed or task.speed
|
|
52
|
+
if speed is None:
|
|
53
|
+
return Text('?', style='grey50')
|
|
54
|
+
if speed < 0.1:
|
|
55
|
+
return Text('-', style='grey50')
|
|
56
|
+
return Text(f'{speed:.1f} {self.unit}/s', style='progress.data.speed')
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class DisplayFileTreeProgress:
|
|
60
|
+
def __init__(self, *, description: str, total_file_count: int, total_size: int):
|
|
61
|
+
percent_kwargs = dict(
|
|
62
|
+
text_format='[progress.percentage]{task.percentage:>3.1f}%',
|
|
63
|
+
justify='right',
|
|
64
|
+
)
|
|
65
|
+
self.overall_progress_bar = Progress(TaskProgressColumn(**percent_kwargs), BarColumn(bar_width=None))
|
|
66
|
+
self.file_count_progress_bar = Progress(TaskProgressColumn(**percent_kwargs), BarColumn(bar_width=None))
|
|
67
|
+
self.file_size_progress_bar = Progress(TaskProgressColumn(**percent_kwargs), BarColumn(bar_width=None))
|
|
68
|
+
|
|
69
|
+
self.overall_progress = Progress(
|
|
70
|
+
TimeElapsedColumn(),
|
|
71
|
+
'/',
|
|
72
|
+
TimeRemainingColumn(),
|
|
73
|
+
)
|
|
74
|
+
self.file_count_progress = Progress(
|
|
75
|
+
TextColumn('[white][progress.elapsed]{task.completed}[white] / [progress.remaining]{task.remaining}'),
|
|
76
|
+
'|',
|
|
77
|
+
TransferSpeedColumn2(unit='files'),
|
|
78
|
+
'|',
|
|
79
|
+
TimeRemainingColumn(),
|
|
80
|
+
)
|
|
81
|
+
self.file_size_progress = Progress(
|
|
82
|
+
HumanFileSizeColumn(),
|
|
83
|
+
'|',
|
|
84
|
+
TransferSpeedColumn(),
|
|
85
|
+
'|',
|
|
86
|
+
TimeRemainingColumn(),
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
self.overall_progress_task_bar = self.overall_progress_bar.add_task('', total=100)
|
|
90
|
+
self.file_count_progress_task_bar = self.file_count_progress_bar.add_task('', total=total_file_count)
|
|
91
|
+
self.file_size_progress_task_bar = self.file_size_progress_bar.add_task('', total=total_size)
|
|
92
|
+
|
|
93
|
+
self.overall_progress_task_time = self.overall_progress.add_task('', total=100)
|
|
94
|
+
self.file_count_progress_task_time = self.file_count_progress.add_task('', total=total_file_count)
|
|
95
|
+
self.file_size_progress_task_time = self.file_size_progress.add_task('', total=total_size)
|
|
96
|
+
|
|
97
|
+
progress_table = Table(box=None, expand=True, padding=(0, 2), show_header=False)
|
|
98
|
+
progress_table.add_row('[b]Overall', self.overall_progress_bar, self.overall_progress)
|
|
99
|
+
progress_table.add_row('Files', self.file_count_progress_bar, self.file_count_progress)
|
|
100
|
+
progress_table.add_row('Size', self.file_size_progress_bar, self.file_size_progress)
|
|
101
|
+
|
|
102
|
+
self.file_count_progress_task = self.file_count_progress.tasks[0]
|
|
103
|
+
self.file_size_progress_task = self.file_size_progress.tasks[0]
|
|
104
|
+
|
|
105
|
+
self.live = Live(
|
|
106
|
+
Panel(
|
|
107
|
+
progress_table,
|
|
108
|
+
title=Text(description, style='progress.data.speed'),
|
|
109
|
+
border_style=Style(color='white', bold=True),
|
|
110
|
+
),
|
|
111
|
+
auto_refresh=False,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
def __enter__(self):
|
|
115
|
+
self.live.__enter__()
|
|
116
|
+
return self
|
|
117
|
+
|
|
118
|
+
def update(
|
|
119
|
+
self,
|
|
120
|
+
completed_file_count: int | None = None,
|
|
121
|
+
advance_size: int | None = None,
|
|
122
|
+
completed_size: int | None = None,
|
|
123
|
+
):
|
|
124
|
+
if completed_file_count is not None:
|
|
125
|
+
self.file_count_progress_bar.update(self.file_count_progress_task_bar, completed=completed_file_count)
|
|
126
|
+
self.file_count_progress.update(self.file_count_progress_task_time, completed=completed_file_count)
|
|
127
|
+
|
|
128
|
+
if completed_size is not None:
|
|
129
|
+
self.file_size_progress_bar.update(self.file_size_progress_task_bar, completed=completed_size)
|
|
130
|
+
self.file_size_progress.update(self.file_size_progress_task_time, completed=completed_size)
|
|
131
|
+
elif advance_size is not None:
|
|
132
|
+
self.file_size_progress_bar.update(self.file_size_progress_task_bar, advance=advance_size)
|
|
133
|
+
self.file_size_progress.update(self.file_size_progress_task_time, advance=advance_size)
|
|
134
|
+
|
|
135
|
+
overall_completed = (self.file_count_progress_task.percentage + self.file_size_progress_task.percentage) / 2
|
|
136
|
+
|
|
137
|
+
self.overall_progress_bar.update(self.overall_progress_task_bar, completed=overall_completed)
|
|
138
|
+
self.overall_progress.update(self.overall_progress_task_time, completed=overall_completed)
|
|
139
|
+
|
|
140
|
+
self.live.refresh()
|
|
141
|
+
|
|
142
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
143
|
+
self.overall_progress.stop()
|
|
144
|
+
self.file_count_progress.stop()
|
|
145
|
+
self.file_size_progress.stop()
|
|
146
|
+
self.live.__exit__(exc_type, exc_value, traceback)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class NoopProgress(DisplayFileTreeProgress):
|
|
150
|
+
def __init__(self, *args, **kwargs):
|
|
151
|
+
pass
|
|
152
|
+
|
|
153
|
+
def __enter__(self):
|
|
154
|
+
return self
|
|
155
|
+
|
|
156
|
+
def update(self, *args, **kwargs):
|
|
157
|
+
pass
|
|
158
|
+
|
|
159
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
160
|
+
return bool(exc_type)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class LargeFileProgress:
|
|
164
|
+
def __init__(self, description: str, *, parent_progress: DisplayFileTreeProgress, total_size: int):
|
|
165
|
+
self.description = description
|
|
166
|
+
self.parent_progress = parent_progress
|
|
167
|
+
self.total_size = total_size
|
|
168
|
+
|
|
169
|
+
self.progress = None
|
|
170
|
+
|
|
171
|
+
def __enter__(self):
|
|
172
|
+
is_large_file = self.total_size > LAGE_FILE_PROGRESS_MIN_SIZE
|
|
173
|
+
if is_large_file:
|
|
174
|
+
self.start_time = time.monotonic()
|
|
175
|
+
self.next_update = self.start_time + 1
|
|
176
|
+
self.advance = 0
|
|
177
|
+
else:
|
|
178
|
+
# No progress indicator for small files
|
|
179
|
+
self.next_update = None
|
|
180
|
+
return self
|
|
181
|
+
|
|
182
|
+
def update(self, advance: int):
|
|
183
|
+
if not self.next_update:
|
|
184
|
+
# Small file -> no progress indicator
|
|
185
|
+
return
|
|
186
|
+
|
|
187
|
+
self.advance += advance
|
|
188
|
+
|
|
189
|
+
now = time.monotonic()
|
|
190
|
+
if now <= self.next_update:
|
|
191
|
+
return
|
|
192
|
+
self.next_update = now + 1
|
|
193
|
+
|
|
194
|
+
if not self.progress:
|
|
195
|
+
percent_done = self.advance / self.total_size
|
|
196
|
+
if percent_done >= 0.4:
|
|
197
|
+
# After 1 sec. is 40 % done, we are probably done soon!
|
|
198
|
+
# Avoid showing progress bar for fast operations
|
|
199
|
+
self.next_update = None # No progress indicator
|
|
200
|
+
return
|
|
201
|
+
|
|
202
|
+
self.progress = Progress(
|
|
203
|
+
TaskProgressColumn(text_format='[progress.percentage]{task.percentage:>3.1f}%'),
|
|
204
|
+
BarColumn(bar_width=None, finished_style=Style(color='rgb(0,100,0)')),
|
|
205
|
+
HumanFileSizeColumn(),
|
|
206
|
+
'|',
|
|
207
|
+
TransferSpeedColumn(),
|
|
208
|
+
'|',
|
|
209
|
+
TimeElapsedColumn(),
|
|
210
|
+
TimeRemainingColumn(),
|
|
211
|
+
)
|
|
212
|
+
self.progress.log(f'Large file processing start: {self.description}')
|
|
213
|
+
self.task_id = self.progress.add_task(
|
|
214
|
+
description=self.description,
|
|
215
|
+
total=self.total_size,
|
|
216
|
+
start_time=self.start_time,
|
|
217
|
+
)
|
|
218
|
+
self.live = Live(
|
|
219
|
+
Panel(self.progress, title=self.description, border_style=Style(color='yellow', bold=True)),
|
|
220
|
+
auto_refresh=False,
|
|
221
|
+
)
|
|
222
|
+
self.live.__enter__()
|
|
223
|
+
|
|
224
|
+
self.parent_progress.update(advance_size=self.advance)
|
|
225
|
+
self.progress.update(task_id=self.task_id, advance=self.advance, refresh=True)
|
|
226
|
+
self.live.refresh()
|
|
227
|
+
self.advance = 0
|
|
228
|
+
|
|
229
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
230
|
+
if self.progress:
|
|
231
|
+
self.progress.log(f'Large file processing finished: {self.description}')
|
|
232
|
+
self.progress.update(task_id=self.task_id, advance=self.advance, refresh=True)
|
|
233
|
+
self.progress.stop()
|
|
234
|
+
self.live.renderable.border_style = 'grey50'
|
|
235
|
+
self.live.refresh()
|
|
236
|
+
self.live.__exit__(exc_type, exc_value, traceback)
|
|
237
|
+
print('\n') # Add spacing after progress bar
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_sha256sums_path(file_path: Path):
|
|
9
|
+
"""
|
|
10
|
+
>>> get_sha256sums_path(Path('foo/bar/baz.txt'))
|
|
11
|
+
PosixPath('foo/bar/SHA256SUMS')
|
|
12
|
+
"""
|
|
13
|
+
hash_file_path = file_path.parent / 'SHA256SUMS'
|
|
14
|
+
return hash_file_path
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def store_hash(file_path: Path, file_hash: str):
|
|
18
|
+
"""DocWrite: README.md ## SHA256SUMS
|
|
19
|
+
A `SHA256SUMS` file is stored in each backup directory containing the SHA256 hashes of all files in that directory.
|
|
20
|
+
It's the same format as e.g.: `sha256sum * > SHA256SUMS` command produces.
|
|
21
|
+
So it's possible to verify the integrity of the backup files later.
|
|
22
|
+
e.g.:
|
|
23
|
+
```bash
|
|
24
|
+
cd .../your/backup/foobar/20240101_120000/
|
|
25
|
+
sha256sum -c SHA256SUMS
|
|
26
|
+
```
|
|
27
|
+
"""
|
|
28
|
+
hash_file_path = get_sha256sums_path(file_path)
|
|
29
|
+
with hash_file_path.open('a') as f:
|
|
30
|
+
f.write(f'{file_hash} {file_path.name}\n')
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def check_sha256sums(
|
|
34
|
+
*,
|
|
35
|
+
file_path: Path,
|
|
36
|
+
file_hash: str,
|
|
37
|
+
) -> bool | None:
|
|
38
|
+
hash_file_path = get_sha256sums_path(file_path=file_path)
|
|
39
|
+
if not hash_file_path.is_file():
|
|
40
|
+
return None # Nothing to verify against
|
|
41
|
+
|
|
42
|
+
with hash_file_path.open('r') as f:
|
|
43
|
+
for line in f:
|
|
44
|
+
try:
|
|
45
|
+
expected_hash, filename = line.split(' ', maxsplit=1)
|
|
46
|
+
except ValueError:
|
|
47
|
+
logger.exception(f'Invalid line in "{hash_file_path}": {line!r}')
|
|
48
|
+
else:
|
|
49
|
+
filename = filename.strip()
|
|
50
|
+
if filename == file_path.name:
|
|
51
|
+
if not expected_hash == file_hash:
|
|
52
|
+
logger.error(
|
|
53
|
+
f'Hash {file_hash} from file {file_path} does not match hash in {hash_file_path} !'
|
|
54
|
+
)
|
|
55
|
+
return False
|
|
56
|
+
else:
|
|
57
|
+
logger.debug(f'{file_path} hash verified successfully from {hash_file_path}.')
|
|
58
|
+
return True
|
|
59
|
+
|
|
60
|
+
logger.info('No SHA256SUMS entry found for file: %s', file_path)
|
|
61
|
+
return None
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import sys
|
|
3
|
+
from contextlib import redirect_stdout
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# Borrowed from click:
|
|
7
|
+
_ansi_re = re.compile(r'\033\[[;?0-9]*[a-zA-Z]')
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def strip_ansi_codes(value: str) -> str:
|
|
11
|
+
return _ansi_re.sub('', value)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TeeStdout:
|
|
15
|
+
def __init__(self, file):
|
|
16
|
+
self.file = file
|
|
17
|
+
self.stdout = sys.stdout
|
|
18
|
+
|
|
19
|
+
def write(self, data):
|
|
20
|
+
self.stdout.write(data)
|
|
21
|
+
self.file.write(strip_ansi_codes(data))
|
|
22
|
+
|
|
23
|
+
def flush(self):
|
|
24
|
+
self.stdout.flush()
|
|
25
|
+
self.file.flush()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TeeStdoutContext:
|
|
29
|
+
def __init__(self, file_path):
|
|
30
|
+
self.file_path = file_path
|
|
31
|
+
|
|
32
|
+
def __enter__(self):
|
|
33
|
+
self.file = open(self.file_path, 'w')
|
|
34
|
+
self.redirect = redirect_stdout(TeeStdout(self.file))
|
|
35
|
+
self.redirect.__enter__()
|
|
36
|
+
return self
|
|
37
|
+
|
|
38
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
39
|
+
self.redirect.__exit__(exc_type, exc_val, exc_tb)
|
|
40
|
+
self.file.close()
|
|
File without changes
|