PyHardLinkBackup 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyHardLinkBackup/__init__.py +7 -0
- PyHardLinkBackup/__main__.py +10 -0
- PyHardLinkBackup/backup.py +261 -0
- PyHardLinkBackup/cli_app/__init__.py +41 -0
- PyHardLinkBackup/cli_app/phlb.py +123 -0
- PyHardLinkBackup/cli_dev/__init__.py +70 -0
- PyHardLinkBackup/cli_dev/benchmark.py +138 -0
- PyHardLinkBackup/cli_dev/code_style.py +12 -0
- PyHardLinkBackup/cli_dev/packaging.py +65 -0
- PyHardLinkBackup/cli_dev/shell_completion.py +23 -0
- PyHardLinkBackup/cli_dev/testing.py +52 -0
- PyHardLinkBackup/cli_dev/update_readme_history.py +33 -0
- PyHardLinkBackup/compare_backup.py +212 -0
- PyHardLinkBackup/constants.py +16 -0
- PyHardLinkBackup/logging_setup.py +124 -0
- PyHardLinkBackup/rebuild_databases.py +176 -0
- PyHardLinkBackup/tests/__init__.py +36 -0
- PyHardLinkBackup/tests/test_backup.py +628 -0
- PyHardLinkBackup/tests/test_compare_backup.py +86 -0
- PyHardLinkBackup/tests/test_doc_write.py +26 -0
- PyHardLinkBackup/tests/test_doctests.py +10 -0
- PyHardLinkBackup/tests/test_project_setup.py +46 -0
- PyHardLinkBackup/tests/test_readme.py +75 -0
- PyHardLinkBackup/tests/test_readme_history.py +9 -0
- PyHardLinkBackup/tests/test_rebuild_database.py +224 -0
- PyHardLinkBackup/utilities/__init__.py +0 -0
- PyHardLinkBackup/utilities/file_hash_database.py +62 -0
- PyHardLinkBackup/utilities/file_size_database.py +46 -0
- PyHardLinkBackup/utilities/filesystem.py +158 -0
- PyHardLinkBackup/utilities/humanize.py +39 -0
- PyHardLinkBackup/utilities/rich_utils.py +99 -0
- PyHardLinkBackup/utilities/sha256sums.py +61 -0
- PyHardLinkBackup/utilities/tee.py +40 -0
- PyHardLinkBackup/utilities/tests/__init__.py +0 -0
- PyHardLinkBackup/utilities/tests/test_file_hash_database.py +143 -0
- PyHardLinkBackup/utilities/tests/test_file_size_database.py +138 -0
- PyHardLinkBackup/utilities/tests/test_filesystem.py +126 -0
- PyHardLinkBackup/utilities/tyro_cli_shared_args.py +12 -0
- pyhardlinkbackup-1.5.0.dist-info/METADATA +600 -0
- pyhardlinkbackup-1.5.0.dist-info/RECORD +42 -0
- pyhardlinkbackup-1.5.0.dist-info/WHEEL +4 -0
- pyhardlinkbackup-1.5.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,628 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import tempfile
|
|
5
|
+
import textwrap
|
|
6
|
+
import unittest
|
|
7
|
+
import zlib
|
|
8
|
+
from collections.abc import Iterable
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from unittest.mock import patch
|
|
11
|
+
|
|
12
|
+
from bx_py_utils.path import assert_is_file
|
|
13
|
+
from bx_py_utils.test_utils.assertion import assert_text_equal
|
|
14
|
+
from bx_py_utils.test_utils.datetime import parse_dt
|
|
15
|
+
from bx_py_utils.test_utils.log_utils import NoLogs
|
|
16
|
+
from bx_py_utils.test_utils.redirect import RedirectOut
|
|
17
|
+
from cli_base.cli_tools.test_utils.base_testcases import OutputMustCapturedTestCaseMixin
|
|
18
|
+
from freezegun import freeze_time
|
|
19
|
+
from tabulate import tabulate
|
|
20
|
+
|
|
21
|
+
from PyHardLinkBackup.backup import BackupResult, backup_tree
|
|
22
|
+
from PyHardLinkBackup.constants import CHUNK_SIZE
|
|
23
|
+
from PyHardLinkBackup.logging_setup import DEFAULT_CONSOLE_LOG_LEVEL, DEFAULT_LOG_FILE_LEVEL, LoggingManager
|
|
24
|
+
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
25
|
+
from PyHardLinkBackup.utilities.filesystem import copy_and_hash, iter_scandir_files
|
|
26
|
+
from PyHardLinkBackup.utilities.tests.test_file_hash_database import assert_hash_db_info
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SortedIterScandirFiles:
|
|
30
|
+
"""
|
|
31
|
+
Important for stable tests: os.scandir() does not guarantee any order of the returned entries.
|
|
32
|
+
This class wraps iter_scandir_files() and yields the entries sorted by name.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self, path: Path, excludes: set):
|
|
36
|
+
self.path = path
|
|
37
|
+
self.excludes = excludes
|
|
38
|
+
|
|
39
|
+
def __enter__(self):
|
|
40
|
+
return self
|
|
41
|
+
|
|
42
|
+
def __iter__(self) -> Iterable[os.DirEntry]:
|
|
43
|
+
scandir_iterator = iter_scandir_files(self.path, self.excludes)
|
|
44
|
+
yield from sorted(scandir_iterator, key=lambda e: e.name)
|
|
45
|
+
|
|
46
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def set_file_times(path: Path, dt: datetime.datetime):
|
|
51
|
+
# move dt to UTC if it has timezone info:
|
|
52
|
+
if dt.tzinfo is not None:
|
|
53
|
+
dt = dt.astimezone(datetime.timezone.utc).replace(tzinfo=None)
|
|
54
|
+
fixed_time = dt.timestamp()
|
|
55
|
+
with NoLogs(logger_name=''):
|
|
56
|
+
for entry in iter_scandir_files(path, excludes=set()):
|
|
57
|
+
os.utime(entry.path, (fixed_time, fixed_time))
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _fs_tree_overview(root: Path) -> str:
|
|
61
|
+
lines = []
|
|
62
|
+
for entry in iter_scandir_files(root, excludes=set()):
|
|
63
|
+
file_path = Path(entry.path)
|
|
64
|
+
try:
|
|
65
|
+
file_stat = entry.stat()
|
|
66
|
+
except FileNotFoundError:
|
|
67
|
+
crc32 = '-'
|
|
68
|
+
nlink = '-'
|
|
69
|
+
size = '-'
|
|
70
|
+
birthtime = '-'
|
|
71
|
+
else:
|
|
72
|
+
is_log_file = entry.name.endswith('-backup.log') or entry.name.endswith('-summary.txt')
|
|
73
|
+
if is_log_file:
|
|
74
|
+
# flaky content!
|
|
75
|
+
crc32 = '<mock>'
|
|
76
|
+
size = '<mock>'
|
|
77
|
+
else:
|
|
78
|
+
crc32 = zlib.crc32(file_path.read_bytes())
|
|
79
|
+
crc32 = f'{crc32:08x}'
|
|
80
|
+
size = file_stat.st_size
|
|
81
|
+
|
|
82
|
+
nlink = file_stat.st_nlink
|
|
83
|
+
|
|
84
|
+
if entry.name == 'SHA256SUMS' or is_log_file:
|
|
85
|
+
birthtime = '<mock>'
|
|
86
|
+
else:
|
|
87
|
+
birthtime = getattr(file_stat, 'st_birthtime', file_stat.st_mtime)
|
|
88
|
+
birthtime = datetime.datetime.fromtimestamp(birthtime).strftime('%H:%M:%S')
|
|
89
|
+
|
|
90
|
+
if entry.is_symlink():
|
|
91
|
+
file_type = 'symlink'
|
|
92
|
+
elif nlink > 1:
|
|
93
|
+
file_type = 'hardlink'
|
|
94
|
+
else:
|
|
95
|
+
file_type = 'file'
|
|
96
|
+
|
|
97
|
+
lines.append(
|
|
98
|
+
[
|
|
99
|
+
str(file_path.relative_to(root)),
|
|
100
|
+
birthtime,
|
|
101
|
+
file_type,
|
|
102
|
+
nlink,
|
|
103
|
+
size,
|
|
104
|
+
crc32,
|
|
105
|
+
]
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
result = tabulate(sorted(lines), headers=['path', 'birthtime', 'type', 'nlink', 'size', 'CRC32'], tablefmt='plain')
|
|
109
|
+
return result
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def assert_fs_tree_overview(root: Path, expected_overview: str):
|
|
113
|
+
expected_overview = textwrap.dedent(expected_overview).strip()
|
|
114
|
+
actual_overview = _fs_tree_overview(root)
|
|
115
|
+
assert_text_equal(
|
|
116
|
+
actual_overview,
|
|
117
|
+
expected_overview,
|
|
118
|
+
msg=f'Filesystem tree overview does not match expected overview.\n\n{actual_overview}\n\n',
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class BackupTreeTestCase(
|
|
123
|
+
OutputMustCapturedTestCaseMixin,
|
|
124
|
+
unittest.TestCase,
|
|
125
|
+
):
|
|
126
|
+
def test_happy_path(self):
|
|
127
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
128
|
+
temp_path = Path(temp_dir).resolve()
|
|
129
|
+
|
|
130
|
+
src_root = temp_path / 'source'
|
|
131
|
+
backup_root = temp_path / 'backup'
|
|
132
|
+
|
|
133
|
+
src_root.mkdir()
|
|
134
|
+
backup_root.mkdir()
|
|
135
|
+
|
|
136
|
+
file1_path = src_root / 'file2.txt'
|
|
137
|
+
file1_path.write_text('This is file 1')
|
|
138
|
+
|
|
139
|
+
(src_root / 'symlink2file1').symlink_to(file1_path)
|
|
140
|
+
os.link(file1_path, src_root / 'hardlink2file1')
|
|
141
|
+
|
|
142
|
+
sub_dir = src_root / 'subdir'
|
|
143
|
+
sub_dir.mkdir()
|
|
144
|
+
(sub_dir / 'file.txt').write_text('This is file in subdir')
|
|
145
|
+
|
|
146
|
+
# Only files bigger than MIN_SIZE will be considered for hardlinking:
|
|
147
|
+
(src_root / 'min_sized_file1.bin').write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
|
|
148
|
+
|
|
149
|
+
# Same content and big enough to be considered for hardlinking:
|
|
150
|
+
(src_root / 'min_sized_file2.bin').write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
|
|
151
|
+
|
|
152
|
+
# Larger then CHUNK_SIZE file will be handled differently:
|
|
153
|
+
(src_root / 'large_file1.bin').write_bytes(b'Y' * (CHUNK_SIZE + 1))
|
|
154
|
+
|
|
155
|
+
excluded_dir = src_root / '.cache'
|
|
156
|
+
excluded_dir.mkdir()
|
|
157
|
+
(excluded_dir / 'tempfile.tmp').write_text('Temporary file that should be excluded')
|
|
158
|
+
|
|
159
|
+
# FIXME: freezegun doesn't handle this, see: https://github.com/spulec/freezegun/issues/392
|
|
160
|
+
# Set modification times to a fixed time for easier testing:
|
|
161
|
+
set_file_times(src_root, dt=parse_dt('2026-01-01T12:00:00+0000'))
|
|
162
|
+
|
|
163
|
+
#######################################################################################
|
|
164
|
+
# Create first backup:
|
|
165
|
+
|
|
166
|
+
with (
|
|
167
|
+
patch('PyHardLinkBackup.backup.iter_scandir_files', SortedIterScandirFiles),
|
|
168
|
+
freeze_time('2026-01-01T12:34:56Z', auto_tick_seconds=0),
|
|
169
|
+
RedirectOut() as redirected_out,
|
|
170
|
+
):
|
|
171
|
+
result = backup_tree(
|
|
172
|
+
src_root=src_root,
|
|
173
|
+
backup_root=backup_root,
|
|
174
|
+
excludes=('.cache',),
|
|
175
|
+
log_manager=LoggingManager(
|
|
176
|
+
console_level='info',
|
|
177
|
+
file_level=DEFAULT_LOG_FILE_LEVEL,
|
|
178
|
+
),
|
|
179
|
+
)
|
|
180
|
+
self.assertEqual(redirected_out.stderr, '')
|
|
181
|
+
self.assertIn('Backup complete', redirected_out.stdout)
|
|
182
|
+
backup_dir = result.backup_dir
|
|
183
|
+
self.assertEqual(
|
|
184
|
+
str(Path(backup_dir).relative_to(temp_path)),
|
|
185
|
+
'backup/source/2026-01-01-123456',
|
|
186
|
+
)
|
|
187
|
+
log_file = result.log_file
|
|
188
|
+
self.assertEqual(
|
|
189
|
+
str(Path(log_file).relative_to(temp_path)),
|
|
190
|
+
'backup/source/2026-01-01-123456-backup.log',
|
|
191
|
+
)
|
|
192
|
+
self.assertEqual(
|
|
193
|
+
result,
|
|
194
|
+
BackupResult(
|
|
195
|
+
backup_dir=backup_dir,
|
|
196
|
+
log_file=log_file,
|
|
197
|
+
backup_count=7,
|
|
198
|
+
backup_size=67110929,
|
|
199
|
+
symlink_files=1,
|
|
200
|
+
hardlinked_files=1,
|
|
201
|
+
hardlinked_size=1000,
|
|
202
|
+
copied_files=5,
|
|
203
|
+
copied_size=67109915,
|
|
204
|
+
copied_small_files=3,
|
|
205
|
+
copied_small_size=50,
|
|
206
|
+
error_count=0,
|
|
207
|
+
),
|
|
208
|
+
redirected_out.stdout,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
# The sources:
|
|
212
|
+
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
213
|
+
assert_fs_tree_overview(
|
|
214
|
+
root=src_root,
|
|
215
|
+
expected_overview="""
|
|
216
|
+
path birthtime type nlink size CRC32
|
|
217
|
+
.cache/tempfile.tmp 12:00:00 file 1 38 41d7a2c9
|
|
218
|
+
file2.txt 12:00:00 hardlink 2 14 8a11514a
|
|
219
|
+
hardlink2file1 12:00:00 hardlink 2 14 8a11514a
|
|
220
|
+
large_file1.bin 12:00:00 file 1 67108865 9671eaac
|
|
221
|
+
min_sized_file1.bin 12:00:00 file 1 1000 f0d93de4
|
|
222
|
+
min_sized_file2.bin 12:00:00 file 1 1000 f0d93de4
|
|
223
|
+
subdir/file.txt 12:00:00 file 1 22 c0167e63
|
|
224
|
+
symlink2file1 12:00:00 symlink 2 14 8a11514a
|
|
225
|
+
""",
|
|
226
|
+
)
|
|
227
|
+
# The backup:
|
|
228
|
+
# * /.cache/ -> excluded
|
|
229
|
+
# * min_sized_file1.bin and min_sized_file2.bin -> hardlinked
|
|
230
|
+
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
231
|
+
assert_fs_tree_overview(
|
|
232
|
+
root=backup_dir,
|
|
233
|
+
expected_overview="""
|
|
234
|
+
path birthtime type nlink size CRC32
|
|
235
|
+
SHA256SUMS <mock> file 1 411 b02da51e
|
|
236
|
+
file2.txt 12:00:00 file 1 14 8a11514a
|
|
237
|
+
hardlink2file1 12:00:00 file 1 14 8a11514a
|
|
238
|
+
large_file1.bin 12:00:00 file 1 67108865 9671eaac
|
|
239
|
+
min_sized_file1.bin 12:00:00 hardlink 2 1000 f0d93de4
|
|
240
|
+
min_sized_file2.bin 12:00:00 hardlink 2 1000 f0d93de4
|
|
241
|
+
subdir/SHA256SUMS <mock> file 1 75 1af5ecc7
|
|
242
|
+
subdir/file.txt 12:00:00 file 1 22 c0167e63
|
|
243
|
+
symlink2file1 12:00:00 symlink 2 14 8a11514a
|
|
244
|
+
""",
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Let's check our FileHashDatabase:
|
|
248
|
+
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
249
|
+
assert_hash_db_info(
|
|
250
|
+
backup_root=backup_root,
|
|
251
|
+
expected="""
|
|
252
|
+
bb/c4/bbc4de2ca238d1… -> source/2026-01-01-123456/min_sized_file1.bin
|
|
253
|
+
e6/37/e6374ac11d9049… -> source/2026-01-01-123456/large_file1.bin
|
|
254
|
+
""",
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
#######################################################################################
|
|
258
|
+
# Backup again with new added files:
|
|
259
|
+
|
|
260
|
+
# New small file with different size and different content:
|
|
261
|
+
(src_root / 'small_file_newA.txt').write_text('A new file')
|
|
262
|
+
|
|
263
|
+
# Add small file that size exists, but has different content:
|
|
264
|
+
(src_root / 'small_file_newB.txt').write_text('This is file 2')
|
|
265
|
+
|
|
266
|
+
# Bigger file with new size and new content:
|
|
267
|
+
(src_root / 'min_sized_file_newA.bin').write_bytes(b'A' * (FileSizeDatabase.MIN_SIZE + 1))
|
|
268
|
+
|
|
269
|
+
# Bigger file with existing size, but different content:
|
|
270
|
+
(src_root / 'min_sized_file_newB.bin').write_bytes(b'B' * FileSizeDatabase.MIN_SIZE)
|
|
271
|
+
|
|
272
|
+
# Add a larger then CHUNK_SIZE file with same existing size, but different content:
|
|
273
|
+
(src_root / 'large_file2.bin').write_bytes(b'Y' * (CHUNK_SIZE + 1))
|
|
274
|
+
|
|
275
|
+
# FIXME: freezegun doesn't handle this, see: https://github.com/spulec/freezegun/issues/392
|
|
276
|
+
# Set modification times to a fixed time for easier testing:
|
|
277
|
+
set_file_times(src_root, dt=parse_dt('2026-01-01T12:00:00+0000'))
|
|
278
|
+
|
|
279
|
+
with (
|
|
280
|
+
patch('PyHardLinkBackup.backup.iter_scandir_files', SortedIterScandirFiles),
|
|
281
|
+
freeze_time('2026-01-02T12:34:56Z', auto_tick_seconds=0),
|
|
282
|
+
RedirectOut() as redirected_out,
|
|
283
|
+
):
|
|
284
|
+
result = backup_tree(
|
|
285
|
+
src_root=src_root,
|
|
286
|
+
backup_root=backup_root,
|
|
287
|
+
excludes=('.cache',),
|
|
288
|
+
log_manager=LoggingManager(
|
|
289
|
+
console_level='info',
|
|
290
|
+
file_level=DEFAULT_LOG_FILE_LEVEL,
|
|
291
|
+
),
|
|
292
|
+
)
|
|
293
|
+
self.assertEqual(redirected_out.stderr, '')
|
|
294
|
+
self.assertIn('Backup complete', redirected_out.stdout)
|
|
295
|
+
backup_dir = result.backup_dir
|
|
296
|
+
self.assertEqual(
|
|
297
|
+
str(Path(backup_dir).relative_to(temp_path)),
|
|
298
|
+
'backup/source/2026-01-02-123456',
|
|
299
|
+
)
|
|
300
|
+
# The second backup:
|
|
301
|
+
# * /.cache/ -> excluded
|
|
302
|
+
# * min_sized_file1.bin and min_sized_file2.bin -> hardlinked
|
|
303
|
+
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
304
|
+
assert_fs_tree_overview(
|
|
305
|
+
root=backup_dir,
|
|
306
|
+
expected_overview="""
|
|
307
|
+
path birthtime type nlink size CRC32
|
|
308
|
+
SHA256SUMS <mock> file 1 845 6596856a
|
|
309
|
+
file2.txt 12:00:00 file 1 14 8a11514a
|
|
310
|
+
hardlink2file1 12:00:00 file 1 14 8a11514a
|
|
311
|
+
large_file1.bin 12:00:00 hardlink 3 67108865 9671eaac
|
|
312
|
+
large_file2.bin 12:00:00 hardlink 3 67108865 9671eaac
|
|
313
|
+
min_sized_file1.bin 12:00:00 hardlink 4 1000 f0d93de4
|
|
314
|
+
min_sized_file2.bin 12:00:00 hardlink 4 1000 f0d93de4
|
|
315
|
+
min_sized_file_newA.bin 12:00:00 file 1 1001 a48f0e33
|
|
316
|
+
min_sized_file_newB.bin 12:00:00 file 1 1000 7d9c564d
|
|
317
|
+
small_file_newA.txt 12:00:00 file 1 10 76d1acf1
|
|
318
|
+
small_file_newB.txt 12:00:00 file 1 14 131800f0
|
|
319
|
+
subdir/SHA256SUMS <mock> file 1 75 1af5ecc7
|
|
320
|
+
subdir/file.txt 12:00:00 file 1 22 c0167e63
|
|
321
|
+
symlink2file1 12:00:00 symlink 2 14 8a11514a
|
|
322
|
+
""",
|
|
323
|
+
)
|
|
324
|
+
self.assertEqual(
|
|
325
|
+
result,
|
|
326
|
+
BackupResult(
|
|
327
|
+
backup_dir=backup_dir,
|
|
328
|
+
log_file=result.log_file,
|
|
329
|
+
backup_count=12,
|
|
330
|
+
backup_size=134221819,
|
|
331
|
+
symlink_files=1,
|
|
332
|
+
hardlinked_files=4,
|
|
333
|
+
hardlinked_size=134219730,
|
|
334
|
+
copied_files=7,
|
|
335
|
+
copied_size=2075,
|
|
336
|
+
copied_small_files=5,
|
|
337
|
+
copied_small_size=74,
|
|
338
|
+
error_count=0,
|
|
339
|
+
),
|
|
340
|
+
redirected_out.stdout,
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
# The FileHashDatabase remains the same:
|
|
344
|
+
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
345
|
+
assert_hash_db_info(
|
|
346
|
+
backup_root=backup_root,
|
|
347
|
+
expected="""
|
|
348
|
+
23/d2/23d2ce40d26211… -> source/2026-01-02-123456/min_sized_file_newA.bin
|
|
349
|
+
9a/56/9a567077114134… -> source/2026-01-02-123456/min_sized_file_newB.bin
|
|
350
|
+
bb/c4/bbc4de2ca238d1… -> source/2026-01-01-123456/min_sized_file1.bin
|
|
351
|
+
e6/37/e6374ac11d9049… -> source/2026-01-01-123456/large_file1.bin
|
|
352
|
+
""",
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
#######################################################################################
|
|
356
|
+
# Don't create broken hardlinks!
|
|
357
|
+
|
|
358
|
+
"""DocWrite: README.md ## FileHashDatabase - Missing hardlink target file
|
|
359
|
+
If a hardlink source from a old backup is missing, we cannot create a hardlink to it.
|
|
360
|
+
But it still works to hardlink same files within the current backup.
|
|
361
|
+
"""
|
|
362
|
+
|
|
363
|
+
# Let's remove one of the files used for hardlinking from the first backup:
|
|
364
|
+
min_sized_file1_bak_path = backup_root / 'source/2026-01-01-123456/min_sized_file1.bin'
|
|
365
|
+
assert_is_file(min_sized_file1_bak_path)
|
|
366
|
+
min_sized_file1_bak_path.unlink()
|
|
367
|
+
|
|
368
|
+
# Backup again:
|
|
369
|
+
with (
|
|
370
|
+
patch('PyHardLinkBackup.backup.iter_scandir_files', SortedIterScandirFiles),
|
|
371
|
+
freeze_time('2026-01-03T12:34:56Z', auto_tick_seconds=0),
|
|
372
|
+
RedirectOut() as redirected_out,
|
|
373
|
+
):
|
|
374
|
+
result = backup_tree(
|
|
375
|
+
src_root=src_root,
|
|
376
|
+
backup_root=backup_root,
|
|
377
|
+
excludes=('.cache',),
|
|
378
|
+
log_manager=LoggingManager(
|
|
379
|
+
console_level=DEFAULT_CONSOLE_LOG_LEVEL,
|
|
380
|
+
file_level=DEFAULT_LOG_FILE_LEVEL,
|
|
381
|
+
),
|
|
382
|
+
)
|
|
383
|
+
self.assertEqual(redirected_out.stderr, '')
|
|
384
|
+
self.assertIn('Backup complete', redirected_out.stdout)
|
|
385
|
+
backup_dir = result.backup_dir
|
|
386
|
+
|
|
387
|
+
# Note: min_sized_file1.bin and min_sized_file2.bin are hardlinked,
|
|
388
|
+
# but not with the first backup anymore! So it's only nlink=2 now!
|
|
389
|
+
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
390
|
+
assert_fs_tree_overview(
|
|
391
|
+
root=backup_dir,
|
|
392
|
+
expected_overview="""
|
|
393
|
+
path birthtime type nlink size CRC32
|
|
394
|
+
SHA256SUMS <mock> file 1 845 6596856a
|
|
395
|
+
file2.txt 12:00:00 file 1 14 8a11514a
|
|
396
|
+
hardlink2file1 12:00:00 file 1 14 8a11514a
|
|
397
|
+
large_file1.bin 12:00:00 hardlink 5 67108865 9671eaac
|
|
398
|
+
large_file2.bin 12:00:00 hardlink 5 67108865 9671eaac
|
|
399
|
+
min_sized_file1.bin 12:00:00 hardlink 2 1000 f0d93de4
|
|
400
|
+
min_sized_file2.bin 12:00:00 hardlink 2 1000 f0d93de4
|
|
401
|
+
min_sized_file_newA.bin 12:00:00 hardlink 2 1001 a48f0e33
|
|
402
|
+
min_sized_file_newB.bin 12:00:00 hardlink 2 1000 7d9c564d
|
|
403
|
+
small_file_newA.txt 12:00:00 file 1 10 76d1acf1
|
|
404
|
+
small_file_newB.txt 12:00:00 file 1 14 131800f0
|
|
405
|
+
subdir/SHA256SUMS <mock> file 1 75 1af5ecc7
|
|
406
|
+
subdir/file.txt 12:00:00 file 1 22 c0167e63
|
|
407
|
+
symlink2file1 12:00:00 symlink 2 14 8a11514a
|
|
408
|
+
""",
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
self.assertEqual(
|
|
412
|
+
result,
|
|
413
|
+
BackupResult(
|
|
414
|
+
backup_dir=backup_dir,
|
|
415
|
+
log_file=result.log_file,
|
|
416
|
+
backup_count=12,
|
|
417
|
+
backup_size=134221819,
|
|
418
|
+
symlink_files=1,
|
|
419
|
+
hardlinked_files=5,
|
|
420
|
+
hardlinked_size=134220731,
|
|
421
|
+
copied_files=6,
|
|
422
|
+
copied_size=1074,
|
|
423
|
+
copied_small_files=5,
|
|
424
|
+
copied_small_size=74,
|
|
425
|
+
error_count=0
|
|
426
|
+
),
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
# Note: min_sized_file1.bin is now from the 2026-01-03 backup!
|
|
430
|
+
self.assertEqual(backup_dir.name, '2026-01-03-123456') # Latest backup dir name
|
|
431
|
+
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
432
|
+
assert_hash_db_info(
|
|
433
|
+
backup_root=backup_root,
|
|
434
|
+
expected="""
|
|
435
|
+
23/d2/23d2ce40d26211… -> source/2026-01-02-123456/min_sized_file_newA.bin
|
|
436
|
+
9a/56/9a567077114134… -> source/2026-01-02-123456/min_sized_file_newB.bin
|
|
437
|
+
bb/c4/bbc4de2ca238d1… -> source/2026-01-03-123456/min_sized_file1.bin
|
|
438
|
+
e6/37/e6374ac11d9049… -> source/2026-01-01-123456/large_file1.bin
|
|
439
|
+
""",
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
def test_symlink(self):
|
|
443
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
444
|
+
temp_path = Path(temp_dir).resolve()
|
|
445
|
+
|
|
446
|
+
src_root = temp_path / 'src'
|
|
447
|
+
backup_root = temp_path / 'bak'
|
|
448
|
+
|
|
449
|
+
src_root.mkdir()
|
|
450
|
+
backup_root.mkdir()
|
|
451
|
+
|
|
452
|
+
source_file_path = src_root / 'source_file.txt'
|
|
453
|
+
source_file_path.write_text('File in the "source" directory.')
|
|
454
|
+
|
|
455
|
+
symlink2source_file_path = src_root / 'symlink2source'
|
|
456
|
+
symlink2source_file_path.symlink_to(source_file_path)
|
|
457
|
+
self.assertEqual(symlink2source_file_path.read_text(), 'File in the "source" directory.')
|
|
458
|
+
|
|
459
|
+
outside_file_path = temp_path / 'outside_file.txt'
|
|
460
|
+
outside_file_path.write_text('File outside the "source" directory!')
|
|
461
|
+
|
|
462
|
+
symlink2outside_file_path = src_root / 'symlink2outside'
|
|
463
|
+
symlink2outside_file_path.symlink_to(outside_file_path)
|
|
464
|
+
self.assertEqual(symlink2outside_file_path.read_text(), 'File outside the "source" directory!')
|
|
465
|
+
|
|
466
|
+
# FIXME: freezegun doesn't handle this, see: https://github.com/spulec/freezegun/issues/392
|
|
467
|
+
# Set modification times to a fixed time for easier testing:
|
|
468
|
+
set_file_times(src_root, dt=parse_dt('2026-01-01T12:00:00+0000'))
|
|
469
|
+
|
|
470
|
+
broken_symlink_path = src_root / 'broken_symlink'
|
|
471
|
+
broken_symlink_path.symlink_to(temp_path / 'not/existing/file.txt')
|
|
472
|
+
broken_symlink_path.is_symlink()
|
|
473
|
+
|
|
474
|
+
#######################################################################################
|
|
475
|
+
# Create first backup:
|
|
476
|
+
|
|
477
|
+
with (
|
|
478
|
+
freeze_time('2026-01-01T12:34:56Z', auto_tick_seconds=0),
|
|
479
|
+
RedirectOut() as redirected_out,
|
|
480
|
+
):
|
|
481
|
+
result = backup_tree(
|
|
482
|
+
src_root=src_root,
|
|
483
|
+
backup_root=backup_root,
|
|
484
|
+
excludes=(),
|
|
485
|
+
log_manager=LoggingManager(
|
|
486
|
+
console_level=DEFAULT_CONSOLE_LOG_LEVEL,
|
|
487
|
+
file_level=DEFAULT_LOG_FILE_LEVEL,
|
|
488
|
+
),
|
|
489
|
+
)
|
|
490
|
+
self.assertEqual(redirected_out.stderr, '')
|
|
491
|
+
self.assertIn('Backup complete', redirected_out.stdout)
|
|
492
|
+
backup_dir1 = result.backup_dir
|
|
493
|
+
self.assertEqual(
|
|
494
|
+
str(Path(backup_dir1).relative_to(temp_path)),
|
|
495
|
+
'bak/src/2026-01-01-123456',
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
499
|
+
"""DocWrite: README.md # PyHardLinkBackup - Notes
|
|
500
|
+
A log file is stored in the backup directory. e.g.:
|
|
501
|
+
* `bak/src/2026-01-01-123456-backup.log`
|
|
502
|
+
|
|
503
|
+
A finished backup also creates a summary file. e.g.:
|
|
504
|
+
* `bak/src/2026-01-01-123456-summary.txt`
|
|
505
|
+
"""
|
|
506
|
+
assert_fs_tree_overview(
|
|
507
|
+
root=temp_path, # The complete overview os source + backup and outside file
|
|
508
|
+
expected_overview="""
|
|
509
|
+
path birthtime type nlink size CRC32
|
|
510
|
+
bak/src/2026-01-01-123456-backup.log <mock> file 1 <mock> <mock>
|
|
511
|
+
bak/src/2026-01-01-123456-summary.txt <mock> file 1 <mock> <mock>
|
|
512
|
+
bak/src/2026-01-01-123456/SHA256SUMS <mock> file 1 82 c03fd60e
|
|
513
|
+
bak/src/2026-01-01-123456/broken_symlink - symlink - - -
|
|
514
|
+
bak/src/2026-01-01-123456/source_file.txt 12:00:00 file 1 31 9309a10c
|
|
515
|
+
bak/src/2026-01-01-123456/symlink2outside 12:00:00 symlink 1 36 24b5bf4c
|
|
516
|
+
bak/src/2026-01-01-123456/symlink2source 12:00:00 symlink 1 31 9309a10c
|
|
517
|
+
outside_file.txt 12:00:00 file 1 36 24b5bf4c
|
|
518
|
+
src/broken_symlink - symlink - - -
|
|
519
|
+
src/source_file.txt 12:00:00 file 1 31 9309a10c
|
|
520
|
+
src/symlink2outside 12:00:00 symlink 1 36 24b5bf4c
|
|
521
|
+
src/symlink2source 12:00:00 symlink 1 31 9309a10c
|
|
522
|
+
""",
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
self.assertEqual(
|
|
526
|
+
result,
|
|
527
|
+
BackupResult(
|
|
528
|
+
backup_dir=backup_dir1,
|
|
529
|
+
log_file=result.log_file,
|
|
530
|
+
backup_count=4,
|
|
531
|
+
backup_size=98,
|
|
532
|
+
symlink_files=3,
|
|
533
|
+
hardlinked_files=0,
|
|
534
|
+
hardlinked_size=0,
|
|
535
|
+
copied_files=1,
|
|
536
|
+
copied_size=31,
|
|
537
|
+
copied_small_files=1,
|
|
538
|
+
copied_small_size=31,
|
|
539
|
+
error_count=0,
|
|
540
|
+
),
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
"""DocWrite: README.md ## backup implementation - Symlinks
|
|
544
|
+
Symlinks are copied as symlinks in the backup."""
|
|
545
|
+
self.assertEqual(
|
|
546
|
+
(backup_dir1 / 'symlink2outside').read_text(),
|
|
547
|
+
'File outside the "source" directory!',
|
|
548
|
+
)
|
|
549
|
+
self.assertEqual(
|
|
550
|
+
(backup_dir1 / 'symlink2source').read_text(),
|
|
551
|
+
'File in the "source" directory.',
|
|
552
|
+
)
|
|
553
|
+
self.assertEqual((backup_dir1 / 'symlink2outside').readlink(), outside_file_path)
|
|
554
|
+
self.assertEqual((backup_dir1 / 'symlink2source').readlink(), source_file_path)
|
|
555
|
+
|
|
556
|
+
"""DocWrite: README.md ## backup implementation - Symlinks
|
|
557
|
+
Symlinks are not stored in our FileHashDatabase, because they are not considered for hardlinking."""
|
|
558
|
+
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
559
|
+
assert_hash_db_info(backup_root=backup_root, expected='')
|
|
560
|
+
|
|
561
|
+
def test_error_handling(self):
|
|
562
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
563
|
+
temp_path = Path(temp_dir).resolve()
|
|
564
|
+
|
|
565
|
+
src_root = temp_path / 'source'
|
|
566
|
+
backup_root = temp_path / 'backup'
|
|
567
|
+
|
|
568
|
+
src_root.mkdir()
|
|
569
|
+
backup_root.mkdir()
|
|
570
|
+
|
|
571
|
+
(src_root / 'file1.txt').write_text('File 1')
|
|
572
|
+
(src_root / 'file2.txt').write_text('File 2')
|
|
573
|
+
(src_root / 'file3.txt').write_text('File 3')
|
|
574
|
+
|
|
575
|
+
# Set modification times to a fixed time for easier testing:
|
|
576
|
+
set_file_times(src_root, dt=parse_dt('2026-01-01T12:00:00+0000'))
|
|
577
|
+
|
|
578
|
+
def mocked_copy_and_hash(src: Path, dst: Path):
|
|
579
|
+
if src.name == 'file2.txt':
|
|
580
|
+
raise PermissionError('Bam!')
|
|
581
|
+
else:
|
|
582
|
+
return copy_and_hash(src, dst)
|
|
583
|
+
|
|
584
|
+
with (
|
|
585
|
+
patch('PyHardLinkBackup.backup.iter_scandir_files', SortedIterScandirFiles),
|
|
586
|
+
patch('PyHardLinkBackup.backup.copy_and_hash', mocked_copy_and_hash),
|
|
587
|
+
freeze_time('2026-01-01T12:34:56Z', auto_tick_seconds=0),
|
|
588
|
+
RedirectOut() as redirected_out,
|
|
589
|
+
):
|
|
590
|
+
result = backup_tree(
|
|
591
|
+
src_root=src_root,
|
|
592
|
+
backup_root=backup_root,
|
|
593
|
+
excludes=('.cache',),
|
|
594
|
+
log_manager=LoggingManager(
|
|
595
|
+
console_level=DEFAULT_CONSOLE_LOG_LEVEL,
|
|
596
|
+
file_level=DEFAULT_LOG_FILE_LEVEL,
|
|
597
|
+
),
|
|
598
|
+
)
|
|
599
|
+
self.assertEqual(redirected_out.stderr, '')
|
|
600
|
+
self.assertIn('Backup complete', redirected_out.stdout)
|
|
601
|
+
self.assertIn('Errors during backup:', redirected_out.stdout)
|
|
602
|
+
|
|
603
|
+
log_file = result.log_file
|
|
604
|
+
assert_is_file(log_file)
|
|
605
|
+
self.assertEqual(str(log_file), f'{temp_path}/backup/source/2026-01-01-123456-backup.log')
|
|
606
|
+
logs = log_file.read_text()
|
|
607
|
+
self.assertIn(
|
|
608
|
+
f'Backup {src_root / "file2.txt"} PermissionError: Bam!\n',
|
|
609
|
+
logs,
|
|
610
|
+
)
|
|
611
|
+
self.assertIn('\nTraceback (most recent call last):\n', logs)
|
|
612
|
+
self.assertEqual(
|
|
613
|
+
result,
|
|
614
|
+
BackupResult(
|
|
615
|
+
backup_dir=result.backup_dir,
|
|
616
|
+
log_file=log_file,
|
|
617
|
+
backup_count=3,
|
|
618
|
+
backup_size=18,
|
|
619
|
+
symlink_files=0,
|
|
620
|
+
hardlinked_files=0,
|
|
621
|
+
hardlinked_size=0,
|
|
622
|
+
copied_files=2,
|
|
623
|
+
copied_size=12,
|
|
624
|
+
copied_small_files=2,
|
|
625
|
+
copied_small_size=12,
|
|
626
|
+
error_count=1,
|
|
627
|
+
),
|
|
628
|
+
)
|