PyHardLinkBackup 1.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. PyHardLinkBackup/__init__.py +7 -0
  2. PyHardLinkBackup/__main__.py +10 -0
  3. PyHardLinkBackup/backup.py +297 -0
  4. PyHardLinkBackup/cli_app/__init__.py +41 -0
  5. PyHardLinkBackup/cli_app/phlb.py +136 -0
  6. PyHardLinkBackup/cli_dev/__init__.py +70 -0
  7. PyHardLinkBackup/cli_dev/__main__.py +10 -0
  8. PyHardLinkBackup/cli_dev/benchmark.py +138 -0
  9. PyHardLinkBackup/cli_dev/code_style.py +12 -0
  10. PyHardLinkBackup/cli_dev/debugging.py +47 -0
  11. PyHardLinkBackup/cli_dev/packaging.py +62 -0
  12. PyHardLinkBackup/cli_dev/shell_completion.py +23 -0
  13. PyHardLinkBackup/cli_dev/testing.py +52 -0
  14. PyHardLinkBackup/cli_dev/update_readme_history.py +33 -0
  15. PyHardLinkBackup/compare_backup.py +259 -0
  16. PyHardLinkBackup/constants.py +18 -0
  17. PyHardLinkBackup/logging_setup.py +124 -0
  18. PyHardLinkBackup/rebuild_databases.py +217 -0
  19. PyHardLinkBackup/tests/__init__.py +36 -0
  20. PyHardLinkBackup/tests/test_backup.py +1167 -0
  21. PyHardLinkBackup/tests/test_compare_backup.py +167 -0
  22. PyHardLinkBackup/tests/test_doc_write.py +26 -0
  23. PyHardLinkBackup/tests/test_doctests.py +10 -0
  24. PyHardLinkBackup/tests/test_project_setup.py +46 -0
  25. PyHardLinkBackup/tests/test_readme.py +75 -0
  26. PyHardLinkBackup/tests/test_readme_history.py +9 -0
  27. PyHardLinkBackup/tests/test_rebuild_database.py +266 -0
  28. PyHardLinkBackup/utilities/__init__.py +0 -0
  29. PyHardLinkBackup/utilities/file_hash_database.py +62 -0
  30. PyHardLinkBackup/utilities/file_size_database.py +46 -0
  31. PyHardLinkBackup/utilities/filesystem.py +257 -0
  32. PyHardLinkBackup/utilities/humanize.py +39 -0
  33. PyHardLinkBackup/utilities/rich_utils.py +237 -0
  34. PyHardLinkBackup/utilities/sha256sums.py +61 -0
  35. PyHardLinkBackup/utilities/tee.py +40 -0
  36. PyHardLinkBackup/utilities/tests/__init__.py +0 -0
  37. PyHardLinkBackup/utilities/tests/test_file_hash_database.py +153 -0
  38. PyHardLinkBackup/utilities/tests/test_file_size_database.py +151 -0
  39. PyHardLinkBackup/utilities/tests/test_filesystem.py +167 -0
  40. PyHardLinkBackup/utilities/tests/unittest_utilities.py +78 -0
  41. PyHardLinkBackup/utilities/tyro_cli_shared_args.py +29 -0
  42. pyhardlinkbackup-1.8.1.dist-info/METADATA +700 -0
  43. pyhardlinkbackup-1.8.1.dist-info/RECORD +45 -0
  44. pyhardlinkbackup-1.8.1.dist-info/WHEEL +4 -0
  45. pyhardlinkbackup-1.8.1.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,46 @@
1
+ from pathlib import Path
2
+
3
+
4
+ class FileSizeDatabase:
5
+ """DocWrite: README.md ## FileSizeDatabase
6
+ A simple "database" to track which file sizes have been seen.
7
+
8
+ Uses a directory structure to avoid too many files in a single directory.
9
+ We don't "cache" anything in Memory, to avoid high memory consumption for large datasets.
10
+ """
11
+
12
+ MIN_SIZE = 1000 # no padding is made, so the min size is 1000 bytes!
13
+
14
+ def __init__(self, phlb_conf_dir: Path):
15
+ self.base_path = phlb_conf_dir / 'size-lookup'
16
+ self.base_path.mkdir(parents=False, exist_ok=True)
17
+
18
+ def _get_size_path(self, size: int) -> Path:
19
+ assert size >= self.MIN_SIZE, f'Size must be at least {self.MIN_SIZE} bytes'
20
+ size_str = str(size)
21
+
22
+ """DocWrite: README.md ## FileSizeDatabase
23
+ Path structure:
24
+ * `{base_dst}/.phlb/size-lookup/{XX}/{YY}/{size}`
25
+
26
+ e.g.:
27
+
28
+ * `1234567890` results in: `{base_dst}/.phlb/size-lookup/12/34/1234567890`
29
+ """
30
+ first_dir_name = size_str[:2]
31
+ second_dir_name = size_str[2:4]
32
+ size_path = self.base_path / first_dir_name / second_dir_name / size_str
33
+ return size_path
34
+
35
+ def __contains__(self, size: int) -> bool:
36
+ size_path = self._get_size_path(size)
37
+ return size_path.exists()
38
+
39
+ def add(self, size: int):
40
+ size_path = self._get_size_path(size)
41
+ if not size_path.exists():
42
+ size_path.parent.mkdir(parents=True, exist_ok=True)
43
+
44
+ """DocWrite: README.md ## FileSizeDatabase
45
+ All files are created empty, as we only care about their existence."""
46
+ size_path.touch(exist_ok=False)
@@ -0,0 +1,257 @@
1
+ import hashlib
2
+ import logging
3
+ import os
4
+ import shutil
5
+ import time
6
+ from pathlib import Path
7
+ from typing import Iterable
8
+
9
+ from bx_py_utils.path import assert_is_dir
10
+ from rich.progress import (
11
+ Progress,
12
+ SpinnerColumn,
13
+ TextColumn,
14
+ TimeElapsedColumn,
15
+ )
16
+
17
+ from PyHardLinkBackup.constants import CHUNK_SIZE, HASH_ALGO
18
+ from PyHardLinkBackup.utilities.rich_utils import DisplayFileTreeProgress, HumanFileSizeColumn, LargeFileProgress
19
+
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ MIN_SIZE_FOR_PROGRESS_BAR = CHUNK_SIZE * 10
24
+
25
+
26
+ def verbose_path_stat(path: Path) -> os.stat_result:
27
+ stat_result = path.stat()
28
+ stat_dict = {}
29
+ for key in dir(stat_result):
30
+ if key.startswith('st_'):
31
+ value = getattr(stat_result, key)
32
+ stat_dict[key] = value
33
+ logger.info('Stat for %s: %s', path, stat_dict)
34
+ return stat_result
35
+
36
+
37
+ class RemoveFileOnError:
38
+ def __init__(self, file_path: Path):
39
+ self.file_path = file_path
40
+
41
+ def __enter__(self):
42
+ pass
43
+
44
+ def __exit__(self, exc_type, exc_value, exc_traceback):
45
+ if exc_type:
46
+ logger.info(
47
+ f'Removing incomplete file {self.file_path} due to error: {exc_value}',
48
+ exc_info=(exc_type, exc_value, exc_traceback),
49
+ )
50
+ self.file_path.unlink(missing_ok=True)
51
+ return False
52
+
53
+
54
+ def hash_file(path: Path, progress: DisplayFileTreeProgress, total_size: int) -> str:
55
+ logger.debug('Hash file %s using %s', path, HASH_ALGO)
56
+ hasher = hashlib.new(HASH_ALGO)
57
+ with LargeFileProgress(
58
+ description=f'Hashing large file: "[yellow]{path}[/yellow]"',
59
+ parent_progress=progress,
60
+ total_size=total_size,
61
+ ) as progress_bar:
62
+ with path.open('rb') as f:
63
+ while chunk := f.read(CHUNK_SIZE):
64
+ hasher.update(chunk)
65
+ progress_bar.update(advance=len(chunk))
66
+ file_hash = hasher.hexdigest()
67
+ logger.info('%s %s hash: %s', path, HASH_ALGO, file_hash)
68
+ return file_hash
69
+
70
+
71
+ def copy_with_progress(src: Path, dst: Path, progress: DisplayFileTreeProgress, total_size: int) -> None:
72
+ logger.debug('Copy file %s to %s using %s', src, dst, HASH_ALGO)
73
+ with LargeFileProgress(
74
+ description=f'Copying large file: "[yellow]{src}[/yellow]"',
75
+ parent_progress=progress,
76
+ total_size=total_size,
77
+ ) as progress_bar:
78
+ with src.open('rb') as source_file, dst.open('wb') as dst_file:
79
+ while chunk := source_file.read(CHUNK_SIZE):
80
+ dst_file.write(chunk)
81
+ progress_bar.update(advance=len(chunk))
82
+
83
+ # Keep original file metadata (permission bits, last access time, last modification time, and flags)
84
+ shutil.copystat(src, dst)
85
+
86
+
87
+ def copy_and_hash(src: Path, dst: Path, progress: DisplayFileTreeProgress, total_size: int) -> str:
88
+ logger.debug('Copy and hash file %s to %s using %s', src, dst, HASH_ALGO)
89
+ hasher = hashlib.new(HASH_ALGO)
90
+ with LargeFileProgress(
91
+ description=f'Copy and hash large file: "[yellow]{src}[/yellow]"',
92
+ parent_progress=progress,
93
+ total_size=total_size,
94
+ ) as progress_bar:
95
+ with src.open('rb') as source_file, dst.open('wb') as dst_file:
96
+ while chunk := source_file.read(CHUNK_SIZE):
97
+ dst_file.write(chunk)
98
+ hasher.update(chunk)
99
+ progress_bar.update(advance=len(chunk))
100
+
101
+ # Keep original file metadata (permission bits, last access time, last modification time, and flags)
102
+ shutil.copystat(src, dst)
103
+
104
+ file_hash = hasher.hexdigest()
105
+ logger.info('%s backup to %s with %s hash: %s', src, dst, HASH_ALGO, file_hash)
106
+ return file_hash
107
+
108
+
109
+ def read_and_hash_file(path: Path) -> tuple[bytes, str]:
110
+ logger.debug('Read and hash file %s using %s into RAM', path, HASH_ALGO)
111
+ content = path.read_bytes()
112
+ hasher = hashlib.new(HASH_ALGO, content)
113
+ file_hash = hasher.hexdigest()
114
+ logger.info('%s %s hash: %s', path, HASH_ALGO, file_hash)
115
+ return content, file_hash
116
+
117
+
118
+ def iter_scandir_files(
119
+ *,
120
+ path: Path,
121
+ one_file_system: bool,
122
+ src_device_id,
123
+ excludes: set[str],
124
+ ) -> Iterable[os.DirEntry]:
125
+ """
126
+ Recursively yield all files+symlinks in the given directory.
127
+ Note: Directory symlinks are treated as files (not recursed into).
128
+ """
129
+ logger.debug('Scanning directory %s', path)
130
+ with os.scandir(path) as scandir_iterator:
131
+ for entry in scandir_iterator:
132
+ if entry.is_dir(
133
+ follow_symlinks=False, # Handle directory symlinks as files!
134
+ ):
135
+ if entry.name in excludes:
136
+ logger.debug('Excluding directory %s', entry.path)
137
+ continue
138
+
139
+ if one_file_system:
140
+ try:
141
+ entry_device_id = entry.stat(follow_symlinks=False).st_dev
142
+ except OSError as err:
143
+ # e.g.: broken symlink
144
+ logger.debug('Skipping directory %s: %s', entry.path, err)
145
+ continue
146
+ if entry_device_id != src_device_id:
147
+ logger.debug(
148
+ 'Skipping directory %s: different device ID %s (src device ID: %s)',
149
+ entry.path,
150
+ entry_device_id,
151
+ src_device_id,
152
+ )
153
+ continue
154
+
155
+ yield from iter_scandir_files(
156
+ path=Path(entry.path),
157
+ one_file_system=one_file_system,
158
+ src_device_id=src_device_id,
159
+ excludes=excludes,
160
+ )
161
+ else:
162
+ # It's a file or symlink or broken symlink
163
+ yield entry
164
+
165
+
166
+ def humanized_fs_scan(
167
+ *,
168
+ path: Path,
169
+ **iter_scandir_files_kwargs,
170
+ ) -> tuple[int, int]:
171
+ print(f'\nScanning filesystem at: {path}...')
172
+
173
+ progress = Progress(
174
+ TimeElapsedColumn(),
175
+ '{task.description}',
176
+ SpinnerColumn('simpleDots'),
177
+ TextColumn('[green]{task.fields[file_count]} Files'),
178
+ '|',
179
+ HumanFileSizeColumn(field_name='total_size'),
180
+ '|',
181
+ TextColumn('[cyan]{task.fields[files_per_sec]} Files/sec'),
182
+ )
183
+
184
+ file_count = 0
185
+ total_size = 0
186
+ start_time = time.time()
187
+ scan_task_id = progress.add_task(
188
+ description='Scanning',
189
+ file_count=file_count,
190
+ total_size=total_size,
191
+ files_per_sec=0.0,
192
+ total=None,
193
+ )
194
+ next_update = 0
195
+ with progress:
196
+ for entry in iter_scandir_files(path=path, **iter_scandir_files_kwargs):
197
+ if not entry.is_file():
198
+ # Ignore e.g.: directory symlinks
199
+ continue
200
+
201
+ file_count += 1
202
+
203
+ try:
204
+ total_size += entry.stat().st_size
205
+ except FileNotFoundError:
206
+ # e.g.: broken symlink
207
+ continue
208
+
209
+ now = time.time()
210
+ if now >= next_update:
211
+ elapsed = max(now - start_time, 1e-6)
212
+ files_per_sec = int(file_count / elapsed)
213
+ progress.update(
214
+ scan_task_id,
215
+ file_count=file_count,
216
+ total_size=total_size,
217
+ files_per_sec=files_per_sec,
218
+ )
219
+ next_update = now + 1
220
+
221
+ now = time.time()
222
+
223
+ elapsed = max(now - start_time, 1e-6)
224
+ files_per_sec = int(file_count / elapsed)
225
+ progress.stop_task(scan_task_id)
226
+ progress.update(
227
+ scan_task_id,
228
+ description='Completed',
229
+ completed=True,
230
+ file_count=file_count,
231
+ total_size=total_size,
232
+ files_per_sec=files_per_sec,
233
+ )
234
+
235
+ return file_count, total_size
236
+
237
+
238
+ def supports_hardlinks(directory: Path) -> bool:
239
+ logger.debug('Checking hardlink support in %s', directory)
240
+ assert_is_dir(directory)
241
+ test_src_file = directory / '.phlb_test'
242
+ test_dst_file = directory / '.phlb_test_link'
243
+ hardlinks_supported = False
244
+ try:
245
+ test_src_file.write_text('test')
246
+ os.link(test_src_file, test_dst_file)
247
+ assert test_dst_file.read_text() == 'test'
248
+ hardlinks_supported = True
249
+ except OSError as err:
250
+ # e.g.: FAT/exFAT filesystems ;)
251
+ logger.exception('Hardlink test failed in %s: %s', directory, err)
252
+ finally:
253
+ test_src_file.unlink(missing_ok=True)
254
+ test_dst_file.unlink(missing_ok=True)
255
+
256
+ logger.info('Hardlink support in %s: %s', directory, hardlinks_supported)
257
+ return hardlinks_supported
@@ -0,0 +1,39 @@
1
+ import time
2
+
3
+ from bx_py_utils.humanize.time import human_timedelta
4
+
5
+
6
+ def human_filesize(size: int | float) -> str:
7
+ """
8
+ >>> human_filesize(1024)
9
+ '1.00 KiB'
10
+ >>> human_filesize(2.2*1024)
11
+ '2.20 KiB'
12
+ >>> human_filesize(3.33*1024*1024)
13
+ '3.33 MiB'
14
+ >>> human_filesize(4.44*1024*1024*1024)
15
+ '4.44 GiB'
16
+ >>> human_filesize(5.55*1024*1024*1024*1024)
17
+ '5.55 TiB'
18
+ >>> human_filesize(6.66*1024*1024*1024*1024*1024)
19
+ '6.66 PiB'
20
+ """
21
+ for unit in ['Bytes', 'KiB', 'MiB', 'GiB', 'TiB']:
22
+ if size < 1024.0:
23
+ return f'{size:.2f} {unit}'
24
+ size /= 1024.0
25
+ return f'{size:.2f} PiB'
26
+
27
+
28
+ class PrintTimingContextManager:
29
+ def __init__(self, description: str):
30
+ self.description = description
31
+
32
+ def __enter__(self) -> None:
33
+ self.start_time = time.perf_counter()
34
+
35
+ def __exit__(self, exc_type, exc_value, traceback):
36
+ duration = time.perf_counter() - self.start_time
37
+ print(f'{self.description}: {human_timedelta(duration)}\n')
38
+ if exc_type:
39
+ return False # Do not suppress exceptions
@@ -0,0 +1,237 @@
1
+ import time
2
+
3
+ from rich.live import Live
4
+ from rich.panel import Panel
5
+ from rich.progress import (
6
+ BarColumn,
7
+ Progress,
8
+ ProgressColumn,
9
+ Task,
10
+ TaskProgressColumn,
11
+ TextColumn,
12
+ TimeElapsedColumn,
13
+ TimeRemainingColumn,
14
+ TransferSpeedColumn,
15
+ )
16
+ from rich.style import Style
17
+ from rich.table import Table
18
+ from rich.text import Text
19
+
20
+ from PyHardLinkBackup.constants import LAGE_FILE_PROGRESS_MIN_SIZE
21
+ from PyHardLinkBackup.utilities.humanize import human_filesize
22
+
23
+
24
+ class HumanFileSizeColumn(ProgressColumn):
25
+ def __init__(self, field_name: str | None = None, **kwargs) -> None:
26
+ super().__init__(**kwargs)
27
+ self.field_name = field_name
28
+
29
+ def render(self, task):
30
+ if self.field_name is None:
31
+ advance_size = task.completed
32
+ remaining_size = task.remaining
33
+ return (
34
+ f'[white][progress.elapsed]{human_filesize(advance_size)}[white]'
35
+ f' / [progress.remaining]{human_filesize(remaining_size)}'
36
+ )
37
+ else:
38
+ try:
39
+ advance_size = task.fields[self.field_name]
40
+ except KeyError:
41
+ raise KeyError(f'Field {self.field_name=} not found in: {task.fields.keys()=}') from None
42
+ return Text(human_filesize(advance_size))
43
+
44
+
45
+ class TransferSpeedColumn2(ProgressColumn):
46
+ def __init__(self, *args, unit: str = 'it', **kwargs) -> None:
47
+ super().__init__(*args, **kwargs)
48
+ self.unit = unit
49
+
50
+ def render(self, task: Task) -> Text:
51
+ speed = task.finished_speed or task.speed
52
+ if speed is None:
53
+ return Text('?', style='grey50')
54
+ if speed < 0.1:
55
+ return Text('-', style='grey50')
56
+ return Text(f'{speed:.1f} {self.unit}/s', style='progress.data.speed')
57
+
58
+
59
+ class DisplayFileTreeProgress:
60
+ def __init__(self, *, description: str, total_file_count: int, total_size: int):
61
+ percent_kwargs = dict(
62
+ text_format='[progress.percentage]{task.percentage:>3.1f}%',
63
+ justify='right',
64
+ )
65
+ self.overall_progress_bar = Progress(TaskProgressColumn(**percent_kwargs), BarColumn(bar_width=None))
66
+ self.file_count_progress_bar = Progress(TaskProgressColumn(**percent_kwargs), BarColumn(bar_width=None))
67
+ self.file_size_progress_bar = Progress(TaskProgressColumn(**percent_kwargs), BarColumn(bar_width=None))
68
+
69
+ self.overall_progress = Progress(
70
+ TimeElapsedColumn(),
71
+ '/',
72
+ TimeRemainingColumn(),
73
+ )
74
+ self.file_count_progress = Progress(
75
+ TextColumn('[white][progress.elapsed]{task.completed}[white] / [progress.remaining]{task.remaining}'),
76
+ '|',
77
+ TransferSpeedColumn2(unit='files'),
78
+ '|',
79
+ TimeRemainingColumn(),
80
+ )
81
+ self.file_size_progress = Progress(
82
+ HumanFileSizeColumn(),
83
+ '|',
84
+ TransferSpeedColumn(),
85
+ '|',
86
+ TimeRemainingColumn(),
87
+ )
88
+
89
+ self.overall_progress_task_bar = self.overall_progress_bar.add_task('', total=100)
90
+ self.file_count_progress_task_bar = self.file_count_progress_bar.add_task('', total=total_file_count)
91
+ self.file_size_progress_task_bar = self.file_size_progress_bar.add_task('', total=total_size)
92
+
93
+ self.overall_progress_task_time = self.overall_progress.add_task('', total=100)
94
+ self.file_count_progress_task_time = self.file_count_progress.add_task('', total=total_file_count)
95
+ self.file_size_progress_task_time = self.file_size_progress.add_task('', total=total_size)
96
+
97
+ progress_table = Table(box=None, expand=True, padding=(0, 2), show_header=False)
98
+ progress_table.add_row('[b]Overall', self.overall_progress_bar, self.overall_progress)
99
+ progress_table.add_row('Files', self.file_count_progress_bar, self.file_count_progress)
100
+ progress_table.add_row('Size', self.file_size_progress_bar, self.file_size_progress)
101
+
102
+ self.file_count_progress_task = self.file_count_progress.tasks[0]
103
+ self.file_size_progress_task = self.file_size_progress.tasks[0]
104
+
105
+ self.live = Live(
106
+ Panel(
107
+ progress_table,
108
+ title=Text(description, style='progress.data.speed'),
109
+ border_style=Style(color='white', bold=True),
110
+ ),
111
+ auto_refresh=False,
112
+ )
113
+
114
+ def __enter__(self):
115
+ self.live.__enter__()
116
+ return self
117
+
118
+ def update(
119
+ self,
120
+ completed_file_count: int | None = None,
121
+ advance_size: int | None = None,
122
+ completed_size: int | None = None,
123
+ ):
124
+ if completed_file_count is not None:
125
+ self.file_count_progress_bar.update(self.file_count_progress_task_bar, completed=completed_file_count)
126
+ self.file_count_progress.update(self.file_count_progress_task_time, completed=completed_file_count)
127
+
128
+ if completed_size is not None:
129
+ self.file_size_progress_bar.update(self.file_size_progress_task_bar, completed=completed_size)
130
+ self.file_size_progress.update(self.file_size_progress_task_time, completed=completed_size)
131
+ elif advance_size is not None:
132
+ self.file_size_progress_bar.update(self.file_size_progress_task_bar, advance=advance_size)
133
+ self.file_size_progress.update(self.file_size_progress_task_time, advance=advance_size)
134
+
135
+ overall_completed = (self.file_count_progress_task.percentage + self.file_size_progress_task.percentage) / 2
136
+
137
+ self.overall_progress_bar.update(self.overall_progress_task_bar, completed=overall_completed)
138
+ self.overall_progress.update(self.overall_progress_task_time, completed=overall_completed)
139
+
140
+ self.live.refresh()
141
+
142
+ def __exit__(self, exc_type, exc_value, traceback):
143
+ self.overall_progress.stop()
144
+ self.file_count_progress.stop()
145
+ self.file_size_progress.stop()
146
+ self.live.__exit__(exc_type, exc_value, traceback)
147
+
148
+
149
+ class NoopProgress(DisplayFileTreeProgress):
150
+ def __init__(self, *args, **kwargs):
151
+ pass
152
+
153
+ def __enter__(self):
154
+ return self
155
+
156
+ def update(self, *args, **kwargs):
157
+ pass
158
+
159
+ def __exit__(self, exc_type, exc_value, traceback):
160
+ return bool(exc_type)
161
+
162
+
163
+ class LargeFileProgress:
164
+ def __init__(self, description: str, *, parent_progress: DisplayFileTreeProgress, total_size: int):
165
+ self.description = description
166
+ self.parent_progress = parent_progress
167
+ self.total_size = total_size
168
+
169
+ self.progress = None
170
+
171
+ def __enter__(self):
172
+ is_large_file = self.total_size > LAGE_FILE_PROGRESS_MIN_SIZE
173
+ if is_large_file:
174
+ self.start_time = time.monotonic()
175
+ self.next_update = self.start_time + 1
176
+ self.advance = 0
177
+ else:
178
+ # No progress indicator for small files
179
+ self.next_update = None
180
+ return self
181
+
182
+ def update(self, advance: int):
183
+ if not self.next_update:
184
+ # Small file -> no progress indicator
185
+ return
186
+
187
+ self.advance += advance
188
+
189
+ now = time.monotonic()
190
+ if now <= self.next_update:
191
+ return
192
+ self.next_update = now + 1
193
+
194
+ if not self.progress:
195
+ percent_done = self.advance / self.total_size
196
+ if percent_done >= 0.4:
197
+ # After 1 sec. is 40 % done, we are probably done soon!
198
+ # Avoid showing progress bar for fast operations
199
+ self.next_update = None # No progress indicator
200
+ return
201
+
202
+ self.progress = Progress(
203
+ TaskProgressColumn(text_format='[progress.percentage]{task.percentage:>3.1f}%'),
204
+ BarColumn(bar_width=None, finished_style=Style(color='rgb(0,100,0)')),
205
+ HumanFileSizeColumn(),
206
+ '|',
207
+ TransferSpeedColumn(),
208
+ '|',
209
+ TimeElapsedColumn(),
210
+ TimeRemainingColumn(),
211
+ )
212
+ self.progress.log(f'Large file processing start: {self.description}')
213
+ self.task_id = self.progress.add_task(
214
+ description=self.description,
215
+ total=self.total_size,
216
+ start_time=self.start_time,
217
+ )
218
+ self.live = Live(
219
+ Panel(self.progress, title=self.description, border_style=Style(color='yellow', bold=True)),
220
+ auto_refresh=False,
221
+ )
222
+ self.live.__enter__()
223
+
224
+ self.parent_progress.update(advance_size=self.advance)
225
+ self.progress.update(task_id=self.task_id, advance=self.advance, refresh=True)
226
+ self.live.refresh()
227
+ self.advance = 0
228
+
229
+ def __exit__(self, exc_type, exc_value, traceback):
230
+ if self.progress:
231
+ self.progress.log(f'Large file processing finished: {self.description}')
232
+ self.progress.update(task_id=self.task_id, advance=self.advance, refresh=True)
233
+ self.progress.stop()
234
+ self.live.renderable.border_style = 'grey50'
235
+ self.live.refresh()
236
+ self.live.__exit__(exc_type, exc_value, traceback)
237
+ print('\n') # Add spacing after progress bar
@@ -0,0 +1,61 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ def get_sha256sums_path(file_path: Path):
9
+ """
10
+ >>> get_sha256sums_path(Path('foo/bar/baz.txt'))
11
+ PosixPath('foo/bar/SHA256SUMS')
12
+ """
13
+ hash_file_path = file_path.parent / 'SHA256SUMS'
14
+ return hash_file_path
15
+
16
+
17
+ def store_hash(file_path: Path, file_hash: str):
18
+ """DocWrite: README.md ## SHA256SUMS
19
+ A `SHA256SUMS` file is stored in each backup directory containing the SHA256 hashes of all files in that directory.
20
+ It's the same format as e.g.: `sha256sum * > SHA256SUMS` command produces.
21
+ So it's possible to verify the integrity of the backup files later.
22
+ e.g.:
23
+ ```bash
24
+ cd .../your/backup/foobar/20240101_120000/
25
+ sha256sum -c SHA256SUMS
26
+ ```
27
+ """
28
+ hash_file_path = get_sha256sums_path(file_path)
29
+ with hash_file_path.open('a') as f:
30
+ f.write(f'{file_hash} {file_path.name}\n')
31
+
32
+
33
+ def check_sha256sums(
34
+ *,
35
+ file_path: Path,
36
+ file_hash: str,
37
+ ) -> bool | None:
38
+ hash_file_path = get_sha256sums_path(file_path=file_path)
39
+ if not hash_file_path.is_file():
40
+ return None # Nothing to verify against
41
+
42
+ with hash_file_path.open('r') as f:
43
+ for line in f:
44
+ try:
45
+ expected_hash, filename = line.split(' ', maxsplit=1)
46
+ except ValueError:
47
+ logger.exception(f'Invalid line in "{hash_file_path}": {line!r}')
48
+ else:
49
+ filename = filename.strip()
50
+ if filename == file_path.name:
51
+ if not expected_hash == file_hash:
52
+ logger.error(
53
+ f'Hash {file_hash} from file {file_path} does not match hash in {hash_file_path} !'
54
+ )
55
+ return False
56
+ else:
57
+ logger.debug(f'{file_path} hash verified successfully from {hash_file_path}.')
58
+ return True
59
+
60
+ logger.info('No SHA256SUMS entry found for file: %s', file_path)
61
+ return None
@@ -0,0 +1,40 @@
1
+ import re
2
+ import sys
3
+ from contextlib import redirect_stdout
4
+
5
+
6
+ # Borrowed from click:
7
+ _ansi_re = re.compile(r'\033\[[;?0-9]*[a-zA-Z]')
8
+
9
+
10
+ def strip_ansi_codes(value: str) -> str:
11
+ return _ansi_re.sub('', value)
12
+
13
+
14
+ class TeeStdout:
15
+ def __init__(self, file):
16
+ self.file = file
17
+ self.stdout = sys.stdout
18
+
19
+ def write(self, data):
20
+ self.stdout.write(data)
21
+ self.file.write(strip_ansi_codes(data))
22
+
23
+ def flush(self):
24
+ self.stdout.flush()
25
+ self.file.flush()
26
+
27
+
28
+ class TeeStdoutContext:
29
+ def __init__(self, file_path):
30
+ self.file_path = file_path
31
+
32
+ def __enter__(self):
33
+ self.file = open(self.file_path, 'w')
34
+ self.redirect = redirect_stdout(TeeStdout(self.file))
35
+ self.redirect.__enter__()
36
+ return self
37
+
38
+ def __exit__(self, exc_type, exc_val, exc_tb):
39
+ self.redirect.__exit__(exc_type, exc_val, exc_tb)
40
+ self.file.close()
File without changes