PyHardLinkBackup 1.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. PyHardLinkBackup/__init__.py +7 -0
  2. PyHardLinkBackup/__main__.py +10 -0
  3. PyHardLinkBackup/backup.py +297 -0
  4. PyHardLinkBackup/cli_app/__init__.py +41 -0
  5. PyHardLinkBackup/cli_app/phlb.py +136 -0
  6. PyHardLinkBackup/cli_dev/__init__.py +70 -0
  7. PyHardLinkBackup/cli_dev/__main__.py +10 -0
  8. PyHardLinkBackup/cli_dev/benchmark.py +138 -0
  9. PyHardLinkBackup/cli_dev/code_style.py +12 -0
  10. PyHardLinkBackup/cli_dev/debugging.py +47 -0
  11. PyHardLinkBackup/cli_dev/packaging.py +62 -0
  12. PyHardLinkBackup/cli_dev/shell_completion.py +23 -0
  13. PyHardLinkBackup/cli_dev/testing.py +52 -0
  14. PyHardLinkBackup/cli_dev/update_readme_history.py +33 -0
  15. PyHardLinkBackup/compare_backup.py +259 -0
  16. PyHardLinkBackup/constants.py +18 -0
  17. PyHardLinkBackup/logging_setup.py +124 -0
  18. PyHardLinkBackup/rebuild_databases.py +217 -0
  19. PyHardLinkBackup/tests/__init__.py +36 -0
  20. PyHardLinkBackup/tests/test_backup.py +1167 -0
  21. PyHardLinkBackup/tests/test_compare_backup.py +167 -0
  22. PyHardLinkBackup/tests/test_doc_write.py +26 -0
  23. PyHardLinkBackup/tests/test_doctests.py +10 -0
  24. PyHardLinkBackup/tests/test_project_setup.py +46 -0
  25. PyHardLinkBackup/tests/test_readme.py +75 -0
  26. PyHardLinkBackup/tests/test_readme_history.py +9 -0
  27. PyHardLinkBackup/tests/test_rebuild_database.py +266 -0
  28. PyHardLinkBackup/utilities/__init__.py +0 -0
  29. PyHardLinkBackup/utilities/file_hash_database.py +62 -0
  30. PyHardLinkBackup/utilities/file_size_database.py +46 -0
  31. PyHardLinkBackup/utilities/filesystem.py +257 -0
  32. PyHardLinkBackup/utilities/humanize.py +39 -0
  33. PyHardLinkBackup/utilities/rich_utils.py +237 -0
  34. PyHardLinkBackup/utilities/sha256sums.py +61 -0
  35. PyHardLinkBackup/utilities/tee.py +40 -0
  36. PyHardLinkBackup/utilities/tests/__init__.py +0 -0
  37. PyHardLinkBackup/utilities/tests/test_file_hash_database.py +153 -0
  38. PyHardLinkBackup/utilities/tests/test_file_size_database.py +151 -0
  39. PyHardLinkBackup/utilities/tests/test_filesystem.py +167 -0
  40. PyHardLinkBackup/utilities/tests/unittest_utilities.py +78 -0
  41. PyHardLinkBackup/utilities/tyro_cli_shared_args.py +29 -0
  42. pyhardlinkbackup-1.8.1.dist-info/METADATA +700 -0
  43. pyhardlinkbackup-1.8.1.dist-info/RECORD +45 -0
  44. pyhardlinkbackup-1.8.1.dist-info/WHEEL +4 -0
  45. pyhardlinkbackup-1.8.1.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1167 @@
1
+ import datetime
2
+ import logging
3
+ import os
4
+ import shutil
5
+ import textwrap
6
+ import unittest
7
+ import zlib
8
+ from collections.abc import Iterable
9
+ from pathlib import Path
10
+ from unittest.mock import patch
11
+
12
+ from bx_py_utils.path import assert_is_file
13
+ from bx_py_utils.test_utils.assertion import assert_text_equal
14
+ from bx_py_utils.test_utils.datetime import parse_dt
15
+ from bx_py_utils.test_utils.log_utils import NoLogs
16
+ from bx_py_utils.test_utils.redirect import RedirectOut
17
+ from freezegun import freeze_time
18
+ from tabulate import tabulate
19
+
20
+ from PyHardLinkBackup.backup import BackupResult, backup_tree
21
+ from PyHardLinkBackup.logging_setup import DEFAULT_LOG_FILE_LEVEL, LoggingManager, LogLevelLiteral
22
+ from PyHardLinkBackup.tests.test_compare_backup import assert_compare_backup
23
+ from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
24
+ from PyHardLinkBackup.utilities.filesystem import copy_and_hash, iter_scandir_files
25
+ from PyHardLinkBackup.utilities.rich_utils import DisplayFileTreeProgress, NoopProgress
26
+ from PyHardLinkBackup.utilities.tests.test_file_hash_database import assert_hash_db_info
27
+ from PyHardLinkBackup.utilities.tests.unittest_utilities import (
28
+ CollectOpenFiles,
29
+ PyHardLinkBackupTestCaseMixin,
30
+ )
31
+
32
+
33
+ class SortedIterScandirFiles:
34
+ """
35
+ Important for stable tests: os.scandir() does not guarantee any order of the returned entries.
36
+ This class wraps iter_scandir_files() and yields the entries sorted by name.
37
+ """
38
+
39
+ def __init__(self, **iter_scandir_files_kwargs):
40
+ self.iter_scandir_files_kwargs = iter_scandir_files_kwargs
41
+
42
+ def __enter__(self):
43
+ return self
44
+
45
+ def __iter__(self) -> Iterable[os.DirEntry]:
46
+ scandir_iterator = iter_scandir_files(**self.iter_scandir_files_kwargs)
47
+ yield from sorted(scandir_iterator, key=lambda e: e.name)
48
+
49
+ def __exit__(self, exc_type, exc_val, exc_tb):
50
+ pass
51
+
52
+
53
+ def set_file_times(path: Path, dt: datetime.datetime):
54
+ # move dt to UTC if it has timezone info:
55
+ if dt.tzinfo is not None:
56
+ dt = dt.astimezone(datetime.timezone.utc).replace(tzinfo=None)
57
+ fixed_time = dt.timestamp()
58
+ with NoLogs(logger_name=''):
59
+ for entry in iter_scandir_files(
60
+ path=path,
61
+ one_file_system=False,
62
+ src_device_id=None,
63
+ excludes=set(),
64
+ ):
65
+ try:
66
+ os.utime(entry.path, (fixed_time, fixed_time))
67
+ except FileNotFoundError:
68
+ # e.g.: broken symlink ;)
69
+ pass
70
+
71
+
72
+ def _fs_tree_overview(root: Path) -> str:
73
+ lines = []
74
+ for entry in iter_scandir_files(
75
+ path=root,
76
+ one_file_system=False,
77
+ src_device_id=None,
78
+ excludes=set(),
79
+ ):
80
+ file_path = Path(entry.path)
81
+ crc32 = '-'
82
+ try:
83
+ file_stat = entry.stat()
84
+ except FileNotFoundError:
85
+ nlink = '-'
86
+ size = '-'
87
+ birthtime = '-'
88
+ else:
89
+ nlink = file_stat.st_nlink
90
+ size = file_stat.st_size
91
+ birthtime = getattr(file_stat, 'st_birthtime', file_stat.st_mtime)
92
+ birthtime = datetime.datetime.fromtimestamp(birthtime).strftime('%H:%M:%S')
93
+ if entry.is_file():
94
+ is_log_file = entry.name.endswith('-backup.log') or entry.name.endswith('-summary.txt')
95
+ if is_log_file:
96
+ # flaky content!
97
+ crc32 = '<mock>'
98
+ size = '<mock>'
99
+ else:
100
+ crc32 = zlib.crc32(file_path.read_bytes())
101
+ crc32 = f'{crc32:08x}'
102
+
103
+ if entry.name == 'SHA256SUMS' or is_log_file:
104
+ birthtime = '<mock>'
105
+
106
+ if file_path.is_dir():
107
+ if entry.is_symlink():
108
+ file_type = 'dir-symlink'
109
+ size = '-'
110
+ # directories have different nlink values on different OSes:
111
+ nlink = '<mock>'
112
+ else:
113
+ raise RuntimeError(f'Found directory in file scan: {file_path}')
114
+ elif entry.is_symlink():
115
+ file_type = 'symlink'
116
+ elif nlink > 1:
117
+ file_type = 'hardlink'
118
+ else:
119
+ file_type = 'file'
120
+
121
+ lines.append(
122
+ [
123
+ str(file_path.relative_to(root)),
124
+ birthtime,
125
+ file_type,
126
+ nlink,
127
+ size,
128
+ crc32,
129
+ ]
130
+ )
131
+
132
+ result = tabulate(sorted(lines), headers=['path', 'birthtime', 'type', 'nlink', 'size', 'CRC32'], tablefmt='plain')
133
+ return result
134
+
135
+
136
+ def assert_fs_tree_overview(root: Path, expected_overview: str):
137
+ expected_overview = textwrap.dedent(expected_overview).strip()
138
+ actual_overview = _fs_tree_overview(root)
139
+ assert_text_equal(
140
+ actual_overview,
141
+ expected_overview,
142
+ msg=f'Filesystem tree overview does not match expected overview.\n\n{actual_overview}\n\n',
143
+ )
144
+
145
+
146
+ class BackupTreeTestCase(
147
+ PyHardLinkBackupTestCaseMixin,
148
+ # TODO: OutputMustCapturedTestCaseMixin,
149
+ unittest.TestCase,
150
+ ):
151
+ def create_backup(
152
+ self,
153
+ *,
154
+ time_to_freeze: str,
155
+ backup_name=None,
156
+ log_file_level: LogLevelLiteral = DEFAULT_LOG_FILE_LEVEL,
157
+ ):
158
+ # FIXME: freezegun doesn't handle this, see: https://github.com/spulec/freezegun/issues/392
159
+ # Set modification times to a fixed time for easier testing:
160
+ set_file_times(self.src_root, dt=parse_dt('2026-01-01T12:00:00+0000'))
161
+
162
+ with (
163
+ patch('PyHardLinkBackup.backup.iter_scandir_files', SortedIterScandirFiles),
164
+ freeze_time(time_to_freeze, auto_tick_seconds=0),
165
+ RedirectOut() as redirected_out,
166
+ ):
167
+ result = backup_tree(
168
+ src_root=self.src_root,
169
+ backup_root=self.backup_root,
170
+ backup_name=backup_name,
171
+ one_file_system=True,
172
+ excludes=('.cache',),
173
+ log_manager=LoggingManager(
174
+ console_level='info',
175
+ file_level=log_file_level,
176
+ ),
177
+ )
178
+
179
+ return redirected_out, result
180
+
181
+ def test_happy_path(self):
182
+ file1_path = self.src_root / 'file2.txt'
183
+ file1_path.write_text('This is file 1')
184
+
185
+ (self.src_root / 'symlink2file1').symlink_to(file1_path)
186
+ os.link(file1_path, self.src_root / 'hardlink2file1')
187
+
188
+ sub_dir = self.src_root / 'subdir'
189
+ sub_dir.mkdir()
190
+ (sub_dir / 'file.txt').write_text('This is file in subdir')
191
+
192
+ # Only files bigger than MIN_SIZE will be considered for hardlinking:
193
+ (self.src_root / 'min_sized_file1.bin').write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
194
+
195
+ # Same content and big enough to be considered for hardlinking:
196
+ (self.src_root / 'min_sized_file2.bin').write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
197
+
198
+ # Larger then CHUNK_SIZE file will be handled differently:
199
+ SMALLER_TEST_CHUNK_SIZE = 1000
200
+ (self.src_root / 'large_file1.bin').write_bytes(b'Y' * (SMALLER_TEST_CHUNK_SIZE + 1))
201
+
202
+ excluded_dir = self.src_root / '.cache'
203
+ excluded_dir.mkdir()
204
+ (excluded_dir / 'tempfile.tmp').write_text('Temporary file that should be excluded')
205
+
206
+ #######################################################################################
207
+ # Create first backup:
208
+
209
+ with (
210
+ patch('PyHardLinkBackup.backup.CHUNK_SIZE', SMALLER_TEST_CHUNK_SIZE),
211
+ CollectOpenFiles(self.temp_path) as collector,
212
+ ):
213
+ redirected_out, result = self.create_backup(time_to_freeze='2026-01-01T12:34:56Z')
214
+
215
+ self.assertEqual(redirected_out.stderr, '')
216
+ self.assertIn('Backup complete', redirected_out.stdout)
217
+ backup_dir = result.backup_dir
218
+ self.assertEqual(
219
+ str(Path(backup_dir).relative_to(self.temp_path)),
220
+ 'backups/source/2026-01-01-123456',
221
+ )
222
+ log_file = result.log_file
223
+ self.assertEqual(
224
+ str(Path(log_file).relative_to(self.temp_path)),
225
+ 'backups/source/2026-01-01-123456-backup.log',
226
+ )
227
+ self.assertEqual(
228
+ result,
229
+ BackupResult(
230
+ backup_dir=backup_dir,
231
+ log_file=log_file,
232
+ backup_count=7,
233
+ backup_size=3065,
234
+ symlink_files=1,
235
+ hardlinked_files=1,
236
+ hardlinked_size=1000,
237
+ copied_files=5,
238
+ copied_size=2051,
239
+ copied_small_files=3,
240
+ copied_small_size=50,
241
+ error_count=0,
242
+ ),
243
+ redirected_out.stdout,
244
+ )
245
+
246
+ self.assertEqual(
247
+ collector.opened_for_read,
248
+ [
249
+ 'r backups/.phlb_test_link',
250
+ 'rb source/subdir/file.txt',
251
+ 'rb source/file2.txt',
252
+ 'rb source/hardlink2file1',
253
+ 'rb source/large_file1.bin',
254
+ 'rb source/min_sized_file1.bin',
255
+ 'rb source/min_sized_file2.bin',
256
+ 'r backups/.phlb/hash-lookup/bb/c4/bbc4de2ca238d1ec41fb622b75b5cf7d31a6d2ac92405043dd8f8220364fefc8',
257
+ ],
258
+ )
259
+ self.assertEqual(
260
+ collector.opened_for_write,
261
+ [
262
+ 'w backups/.phlb_test',
263
+ 'a backups/source/2026-01-01-123456-backup.log',
264
+ 'wb backups/source/2026-01-01-123456/subdir/file.txt',
265
+ 'a backups/source/2026-01-01-123456/subdir/SHA256SUMS',
266
+ 'wb backups/source/2026-01-01-123456/file2.txt',
267
+ 'a backups/source/2026-01-01-123456/SHA256SUMS',
268
+ 'wb backups/source/2026-01-01-123456/hardlink2file1',
269
+ 'a backups/source/2026-01-01-123456/SHA256SUMS',
270
+ 'wb backups/source/2026-01-01-123456/large_file1.bin',
271
+ 'w backups/.phlb/hash-lookup/e3/71/e3711d0eacddeb105af4ad9b0d63069d759acf32e49712663419e68dc294a94a',
272
+ 'a backups/source/2026-01-01-123456/SHA256SUMS',
273
+ 'wb backups/source/2026-01-01-123456/min_sized_file1.bin',
274
+ 'w backups/.phlb/hash-lookup/bb/c4/bbc4de2ca238d1ec41fb622b75b5cf7d31a6d2ac92405043dd8f8220364fefc8',
275
+ 'a backups/source/2026-01-01-123456/SHA256SUMS',
276
+ 'a backups/source/2026-01-01-123456/SHA256SUMS',
277
+ 'w backups/source/2026-01-01-123456-summary.txt',
278
+ ],
279
+ )
280
+
281
+ # The sources:
282
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
283
+ assert_fs_tree_overview(
284
+ root=self.src_root,
285
+ expected_overview="""
286
+ path birthtime type nlink size CRC32
287
+ .cache/tempfile.tmp 12:00:00 file 1 38 41d7a2c9
288
+ file2.txt 12:00:00 hardlink 2 14 8a11514a
289
+ hardlink2file1 12:00:00 hardlink 2 14 8a11514a
290
+ large_file1.bin 12:00:00 file 1 1001 fb3014ff
291
+ min_sized_file1.bin 12:00:00 file 1 1000 f0d93de4
292
+ min_sized_file2.bin 12:00:00 file 1 1000 f0d93de4
293
+ subdir/file.txt 12:00:00 file 1 22 c0167e63
294
+ symlink2file1 12:00:00 symlink 2 14 8a11514a
295
+ """,
296
+ )
297
+ # The backup:
298
+ # * /.cache/ -> excluded
299
+ # * min_sized_file1.bin and min_sized_file2.bin -> hardlinked
300
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
301
+ assert_fs_tree_overview(
302
+ root=backup_dir,
303
+ expected_overview="""
304
+ path birthtime type nlink size CRC32
305
+ SHA256SUMS <mock> file 1 411 a43ac4cb
306
+ file2.txt 12:00:00 file 1 14 8a11514a
307
+ hardlink2file1 12:00:00 file 1 14 8a11514a
308
+ large_file1.bin 12:00:00 file 1 1001 fb3014ff
309
+ min_sized_file1.bin 12:00:00 hardlink 2 1000 f0d93de4
310
+ min_sized_file2.bin 12:00:00 hardlink 2 1000 f0d93de4
311
+ subdir/SHA256SUMS <mock> file 1 75 1af5ecc7
312
+ subdir/file.txt 12:00:00 file 1 22 c0167e63
313
+ symlink2file1 12:00:00 symlink 2 14 8a11514a
314
+ """,
315
+ )
316
+
317
+ # Let's check our FileHashDatabase:
318
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
319
+ assert_hash_db_info(
320
+ backup_root=self.backup_root,
321
+ expected="""
322
+ bb/c4/bbc4de2ca238d1… -> source/2026-01-01-123456/min_sized_file1.bin
323
+ e3/71/e3711d0eacddeb… -> source/2026-01-01-123456/large_file1.bin
324
+ """,
325
+ )
326
+
327
+ #######################################################################################
328
+ # Compare the backup
329
+
330
+ assert_compare_backup(
331
+ test_case=self,
332
+ src_root=self.src_root,
333
+ backup_root=self.backup_root,
334
+ excludes=('.cache',),
335
+ excpected_last_timestamp='2026-01-01-123456', # Freezed time, see above
336
+ excpected_total_file_count=7,
337
+ excpected_successful_file_count=7,
338
+ excpected_error_count=0,
339
+ )
340
+
341
+ #######################################################################################
342
+ # Backup again with new added files:
343
+
344
+ # New small file with different size and different content:
345
+ (self.src_root / 'small_file_newA.txt').write_text('A new file')
346
+
347
+ # Add small file that size exists, but has different content:
348
+ (self.src_root / 'small_file_newB.txt').write_text('This is file 2')
349
+
350
+ # Bigger file with new size and new content:
351
+ (self.src_root / 'min_sized_file_newA.bin').write_bytes(b'A' * (FileSizeDatabase.MIN_SIZE + 1))
352
+
353
+ # Bigger file with existing size, but different content:
354
+ (self.src_root / 'min_sized_file_newB.bin').write_bytes(b'B' * FileSizeDatabase.MIN_SIZE)
355
+
356
+ # Add a larger then CHUNK_SIZE file with same existing size, but different content:
357
+ (self.src_root / 'large_file2.bin').write_bytes(b'Y' * (SMALLER_TEST_CHUNK_SIZE + 1))
358
+
359
+ #######################################################################################
360
+ # Backup the second time:
361
+
362
+ with (
363
+ patch('PyHardLinkBackup.backup.CHUNK_SIZE', SMALLER_TEST_CHUNK_SIZE),
364
+ CollectOpenFiles(self.temp_path) as collector,
365
+ ):
366
+ redirected_out, result = self.create_backup(time_to_freeze='2026-01-02T12:34:56Z')
367
+
368
+ self.assertEqual(redirected_out.stderr, '')
369
+ self.assertIn('Backup complete', redirected_out.stdout)
370
+ backup_dir = result.backup_dir
371
+ self.assertEqual(
372
+ str(Path(backup_dir).relative_to(self.temp_path)),
373
+ 'backups/source/2026-01-02-123456',
374
+ )
375
+ # The second backup:
376
+ # * /.cache/ -> excluded
377
+ # * min_sized_file1.bin and min_sized_file2.bin -> hardlinked
378
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
379
+ assert_fs_tree_overview(
380
+ root=backup_dir,
381
+ expected_overview="""
382
+ path birthtime type nlink size CRC32
383
+ SHA256SUMS <mock> file 1 845 b8aa6635
384
+ file2.txt 12:00:00 file 1 14 8a11514a
385
+ hardlink2file1 12:00:00 file 1 14 8a11514a
386
+ large_file1.bin 12:00:00 hardlink 3 1001 fb3014ff
387
+ large_file2.bin 12:00:00 hardlink 3 1001 fb3014ff
388
+ min_sized_file1.bin 12:00:00 hardlink 4 1000 f0d93de4
389
+ min_sized_file2.bin 12:00:00 hardlink 4 1000 f0d93de4
390
+ min_sized_file_newA.bin 12:00:00 file 1 1001 a48f0e33
391
+ min_sized_file_newB.bin 12:00:00 file 1 1000 7d9c564d
392
+ small_file_newA.txt 12:00:00 file 1 10 76d1acf1
393
+ small_file_newB.txt 12:00:00 file 1 14 131800f0
394
+ subdir/SHA256SUMS <mock> file 1 75 1af5ecc7
395
+ subdir/file.txt 12:00:00 file 1 22 c0167e63
396
+ symlink2file1 12:00:00 symlink 2 14 8a11514a
397
+ """,
398
+ )
399
+ self.assertEqual(
400
+ result,
401
+ BackupResult(
402
+ backup_dir=backup_dir,
403
+ log_file=result.log_file,
404
+ backup_count=12,
405
+ backup_size=6091,
406
+ symlink_files=1,
407
+ hardlinked_files=4,
408
+ hardlinked_size=4002,
409
+ copied_files=7,
410
+ copied_size=2075,
411
+ copied_small_files=5,
412
+ copied_small_size=74,
413
+ error_count=0,
414
+ ),
415
+ redirected_out.stdout,
416
+ )
417
+
418
+ # The FileHashDatabase remains the same:
419
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
420
+ assert_hash_db_info(
421
+ backup_root=self.backup_root,
422
+ expected="""
423
+ 23/d2/23d2ce40d26211… -> source/2026-01-02-123456/min_sized_file_newA.bin
424
+ 9a/56/9a567077114134… -> source/2026-01-02-123456/min_sized_file_newB.bin
425
+ bb/c4/bbc4de2ca238d1… -> source/2026-01-01-123456/min_sized_file1.bin
426
+ e3/71/e3711d0eacddeb… -> source/2026-01-01-123456/large_file1.bin
427
+ """,
428
+ )
429
+
430
+ self.assertEqual(
431
+ collector.opened_for_read,
432
+ [
433
+ 'r backups/.phlb_test_link',
434
+ 'rb source/subdir/file.txt',
435
+ 'rb source/file2.txt',
436
+ 'rb source/hardlink2file1',
437
+ 'rb source/large_file1.bin',
438
+ 'r backups/.phlb/hash-lookup/e3/71/e3711d0eacddeb105af4ad9b0d63069d759acf32e49712663419e68dc294a94a',
439
+ 'rb source/large_file2.bin',
440
+ 'r backups/.phlb/hash-lookup/e3/71/e3711d0eacddeb105af4ad9b0d63069d759acf32e49712663419e68dc294a94a',
441
+ 'rb source/min_sized_file1.bin',
442
+ 'r backups/.phlb/hash-lookup/bb/c4/bbc4de2ca238d1ec41fb622b75b5cf7d31a6d2ac92405043dd8f8220364fefc8',
443
+ 'rb source/min_sized_file2.bin',
444
+ 'r backups/.phlb/hash-lookup/bb/c4/bbc4de2ca238d1ec41fb622b75b5cf7d31a6d2ac92405043dd8f8220364fefc8',
445
+ 'rb source/min_sized_file_newA.bin',
446
+ 'r backups/.phlb/hash-lookup/23/d2/23d2ce40d26211a9ffe8096fd1f927f2abd094691839d24f88440f7c5168d500',
447
+ 'rb source/min_sized_file_newA.bin',
448
+ 'rb source/min_sized_file_newB.bin',
449
+ 'r backups/.phlb/hash-lookup/9a/56/9a5670771141349931d69d6eb982faa01def544dc17a161ef83b3277fb7c0c3c',
450
+ 'rb source/small_file_newA.txt',
451
+ 'rb source/small_file_newB.txt',
452
+ ],
453
+ )
454
+ self.assertEqual(
455
+ collector.opened_for_write,
456
+ [
457
+ 'w backups/.phlb_test',
458
+ 'a backups/source/2026-01-02-123456-backup.log',
459
+ 'wb backups/source/2026-01-02-123456/subdir/file.txt',
460
+ 'a backups/source/2026-01-02-123456/subdir/SHA256SUMS',
461
+ 'wb backups/source/2026-01-02-123456/file2.txt',
462
+ 'a backups/source/2026-01-02-123456/SHA256SUMS',
463
+ 'wb backups/source/2026-01-02-123456/hardlink2file1',
464
+ 'a backups/source/2026-01-02-123456/SHA256SUMS',
465
+ 'a backups/source/2026-01-02-123456/SHA256SUMS',
466
+ 'a backups/source/2026-01-02-123456/SHA256SUMS',
467
+ 'a backups/source/2026-01-02-123456/SHA256SUMS',
468
+ 'a backups/source/2026-01-02-123456/SHA256SUMS',
469
+ 'wb backups/source/2026-01-02-123456/min_sized_file_newA.bin',
470
+ 'w backups/.phlb/hash-lookup/23/d2/23d2ce40d26211a9ffe8096fd1f927f2abd094691839d24f88440f7c5168d500',
471
+ 'a backups/source/2026-01-02-123456/SHA256SUMS',
472
+ 'wb backups/source/2026-01-02-123456/min_sized_file_newB.bin',
473
+ 'w backups/.phlb/hash-lookup/9a/56/9a5670771141349931d69d6eb982faa01def544dc17a161ef83b3277fb7c0c3c',
474
+ 'a backups/source/2026-01-02-123456/SHA256SUMS',
475
+ 'wb backups/source/2026-01-02-123456/small_file_newA.txt',
476
+ 'a backups/source/2026-01-02-123456/SHA256SUMS',
477
+ 'wb backups/source/2026-01-02-123456/small_file_newB.txt',
478
+ 'a backups/source/2026-01-02-123456/SHA256SUMS',
479
+ 'w backups/source/2026-01-02-123456-summary.txt',
480
+ ],
481
+ )
482
+
483
+ #######################################################################################
484
+ # Compare the backup
485
+
486
+ assert_compare_backup(
487
+ test_case=self,
488
+ src_root=self.src_root,
489
+ backup_root=self.backup_root,
490
+ excludes=('.cache',),
491
+ excpected_last_timestamp='2026-01-02-123456', # Freezed time, see above
492
+ excpected_total_file_count=12,
493
+ excpected_successful_file_count=12,
494
+ excpected_error_count=0,
495
+ )
496
+
497
+ #######################################################################################
498
+ # Don't create broken hardlinks!
499
+
500
+ """DocWrite: README.md ## FileHashDatabase - Missing hardlink target file
501
+ If a hardlink source from a old backup is missing, we cannot create a hardlink to it.
502
+ But it still works to hardlink same files within the current backup.
503
+ """
504
+
505
+ # Let's remove one of the files used for hardlinking from the first backup:
506
+ min_sized_file1_bak_path = self.backup_root / 'source/2026-01-01-123456/min_sized_file1.bin'
507
+ assert_is_file(min_sized_file1_bak_path)
508
+ min_sized_file1_bak_path.unlink()
509
+
510
+ # Backup again:
511
+ redirected_out, result = self.create_backup(time_to_freeze='2026-01-03T12:34:56Z')
512
+
513
+ self.assertEqual(redirected_out.stderr, '')
514
+ self.assertIn('Backup complete', redirected_out.stdout)
515
+ backup_dir = result.backup_dir
516
+
517
+ # Note: min_sized_file1.bin and min_sized_file2.bin are hardlinked,
518
+ # but not with the first backup anymore! So it's only nlink=2 now!
519
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
520
+ assert_fs_tree_overview(
521
+ root=backup_dir,
522
+ expected_overview="""
523
+ path birthtime type nlink size CRC32
524
+ SHA256SUMS <mock> file 1 845 b8aa6635
525
+ file2.txt 12:00:00 file 1 14 8a11514a
526
+ hardlink2file1 12:00:00 file 1 14 8a11514a
527
+ large_file1.bin 12:00:00 hardlink 5 1001 fb3014ff
528
+ large_file2.bin 12:00:00 hardlink 5 1001 fb3014ff
529
+ min_sized_file1.bin 12:00:00 hardlink 2 1000 f0d93de4
530
+ min_sized_file2.bin 12:00:00 hardlink 2 1000 f0d93de4
531
+ min_sized_file_newA.bin 12:00:00 hardlink 2 1001 a48f0e33
532
+ min_sized_file_newB.bin 12:00:00 hardlink 2 1000 7d9c564d
533
+ small_file_newA.txt 12:00:00 file 1 10 76d1acf1
534
+ small_file_newB.txt 12:00:00 file 1 14 131800f0
535
+ subdir/SHA256SUMS <mock> file 1 75 1af5ecc7
536
+ subdir/file.txt 12:00:00 file 1 22 c0167e63
537
+ symlink2file1 12:00:00 symlink 2 14 8a11514a
538
+ """,
539
+ )
540
+
541
+ self.assertEqual(
542
+ result,
543
+ BackupResult(
544
+ backup_dir=backup_dir,
545
+ log_file=result.log_file,
546
+ backup_count=12,
547
+ backup_size=6091,
548
+ symlink_files=1,
549
+ hardlinked_files=5,
550
+ hardlinked_size=5003,
551
+ copied_files=6,
552
+ copied_size=1074,
553
+ copied_small_files=5,
554
+ copied_small_size=74,
555
+ error_count=0,
556
+ ),
557
+ )
558
+
559
+ # Note: min_sized_file1.bin is now from the 2026-01-03 backup!
560
+ self.assertEqual(backup_dir.name, '2026-01-03-123456') # Latest backup dir name
561
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
562
+ assert_hash_db_info(
563
+ backup_root=self.backup_root,
564
+ expected="""
565
+ 23/d2/23d2ce40d26211… -> source/2026-01-02-123456/min_sized_file_newA.bin
566
+ 9a/56/9a567077114134… -> source/2026-01-02-123456/min_sized_file_newB.bin
567
+ bb/c4/bbc4de2ca238d1… -> source/2026-01-03-123456/min_sized_file1.bin
568
+ e3/71/e3711d0eacddeb… -> source/2026-01-01-123456/large_file1.bin
569
+ """,
570
+ )
571
+
572
+ #######################################################################################
573
+ # Compare the backup
574
+
575
+ assert_compare_backup(
576
+ test_case=self,
577
+ src_root=self.src_root,
578
+ backup_root=self.backup_root,
579
+ excludes=('.cache',),
580
+ excpected_last_timestamp='2026-01-03-123456', # Freezed time, see above
581
+ excpected_total_file_count=12,
582
+ excpected_successful_file_count=12,
583
+ excpected_error_count=0,
584
+ )
585
+
586
+ def test_symlink(self):
587
+ source_file_path = self.src_root / 'source_file.txt'
588
+ source_file_path.write_text('File in the "source" directory.')
589
+
590
+ symlink2source_file_path = self.src_root / 'symlink2source'
591
+ symlink2source_file_path.symlink_to(source_file_path)
592
+ self.assertEqual(symlink2source_file_path.read_text(), 'File in the "source" directory.')
593
+
594
+ outside_file_path = self.temp_path / 'outside_file.txt'
595
+ outside_file_path.write_text('File outside the "source" directory!')
596
+
597
+ symlink2outside_file_path = self.src_root / 'symlink2outside'
598
+ symlink2outside_file_path.symlink_to(outside_file_path)
599
+ self.assertEqual(symlink2outside_file_path.read_text(), 'File outside the "source" directory!')
600
+
601
+ broken_symlink_path = self.src_root / 'broken_symlink'
602
+ broken_symlink_path.symlink_to(self.temp_path / 'not/existing/file.txt')
603
+ broken_symlink_path.is_symlink()
604
+
605
+ #######################################################################################
606
+ # Create first backup:
607
+
608
+ redirected_out, result = self.create_backup(time_to_freeze='2026-01-01T12:34:56Z')
609
+ self.assertEqual(redirected_out.stderr, '')
610
+ self.assertIn('Backup complete', redirected_out.stdout)
611
+ backup_dir1 = result.backup_dir
612
+ self.assertEqual(
613
+ str(Path(backup_dir1).relative_to(self.temp_path)),
614
+ 'backups/source/2026-01-01-123456',
615
+ )
616
+
617
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
618
+ """DocWrite: README.md # PyHardLinkBackup - Notes
619
+ A log file is stored in the backup directory. e.g.:
620
+ * `backups/source/2026-01-01-123456-backup.log`
621
+
622
+ A finished backup also creates a summary file. e.g.:
623
+ * `backups/source/2026-01-01-123456-summary.txt`
624
+ """
625
+ assert_fs_tree_overview(
626
+ root=self.temp_path, # The complete overview os source + backup and outside file
627
+ expected_overview="""
628
+ path birthtime type nlink size CRC32
629
+ backups/source/2026-01-01-123456-backup.log <mock> file 1 <mock> <mock>
630
+ backups/source/2026-01-01-123456-summary.txt <mock> file 1 <mock> <mock>
631
+ backups/source/2026-01-01-123456/SHA256SUMS <mock> file 1 82 c03fd60e
632
+ backups/source/2026-01-01-123456/broken_symlink - symlink - - -
633
+ backups/source/2026-01-01-123456/source_file.txt 12:00:00 file 1 31 9309a10c
634
+ backups/source/2026-01-01-123456/symlink2outside 12:00:00 symlink 1 36 24b5bf4c
635
+ backups/source/2026-01-01-123456/symlink2source 12:00:00 symlink 1 31 9309a10c
636
+ outside_file.txt 12:00:00 file 1 36 24b5bf4c
637
+ source/broken_symlink - symlink - - -
638
+ source/source_file.txt 12:00:00 file 1 31 9309a10c
639
+ source/symlink2outside 12:00:00 symlink 1 36 24b5bf4c
640
+ source/symlink2source 12:00:00 symlink 1 31 9309a10c
641
+ """,
642
+ )
643
+
644
+ self.assertEqual(
645
+ result,
646
+ BackupResult(
647
+ backup_dir=backup_dir1,
648
+ log_file=result.log_file,
649
+ backup_count=4,
650
+ backup_size=98,
651
+ symlink_files=3,
652
+ hardlinked_files=0,
653
+ hardlinked_size=0,
654
+ copied_files=1,
655
+ copied_size=31,
656
+ copied_small_files=1,
657
+ copied_small_size=31,
658
+ error_count=0,
659
+ ),
660
+ )
661
+
662
+ """DocWrite: README.md ## backup implementation - Symlinks
663
+ Symlinks are copied as symlinks in the backup."""
664
+ self.assertEqual(
665
+ (backup_dir1 / 'symlink2outside').read_text(),
666
+ 'File outside the "source" directory!',
667
+ )
668
+ self.assertEqual(
669
+ (backup_dir1 / 'symlink2source').read_text(),
670
+ 'File in the "source" directory.',
671
+ )
672
+ self.assertEqual((backup_dir1 / 'symlink2outside').readlink(), outside_file_path)
673
+ self.assertEqual((backup_dir1 / 'symlink2source').readlink(), source_file_path)
674
+
675
+ """DocWrite: README.md ## backup implementation - Symlinks
676
+ Symlinks are not stored in our FileHashDatabase, because they are not considered for hardlinking."""
677
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
678
+ assert_hash_db_info(backup_root=self.backup_root, expected='')
679
+
680
+ #######################################################################################
681
+ # Compare the backup
682
+
683
+ assert_compare_backup(
684
+ test_case=self,
685
+ src_root=self.src_root,
686
+ backup_root=self.backup_root,
687
+ std_out_parts=(
688
+ 'Compare completed.',
689
+ 'broken_symlink', # <<< the error we expect
690
+ ),
691
+ excludes=('.cache',),
692
+ excpected_last_timestamp='2026-01-01-123456', # Freezed time, see above
693
+ excpected_total_file_count=3,
694
+ excpected_successful_file_count=3,
695
+ excpected_error_count=0,
696
+ )
697
+
698
+ def test_error_handling(self):
699
+ (self.src_root / 'file1.txt').write_text('File 1')
700
+ (self.src_root / 'file2.txt').write_text('File 2')
701
+ (self.src_root / 'file3.txt').write_text('File 3')
702
+
703
+ def mocked_copy_and_hash(src: Path, dst: Path, progress: DisplayFileTreeProgress, total_size: int):
704
+ file_hash = copy_and_hash(src, dst, NoopProgress(), total_size)
705
+ if src.name == 'file2.txt':
706
+ raise PermissionError('Bam!')
707
+ return file_hash
708
+
709
+ with (
710
+ patch('PyHardLinkBackup.backup.copy_and_hash', mocked_copy_and_hash),
711
+ CollectOpenFiles(self.temp_path) as collector,
712
+ ):
713
+ redirected_out, result = self.create_backup(time_to_freeze='2026-01-01T12:34:56Z')
714
+ self.assertEqual(
715
+ collector.opened_for_read,
716
+ [
717
+ 'r backups/.phlb_test_link',
718
+ 'rb source/file1.txt',
719
+ 'rb source/file2.txt',
720
+ 'rb source/file3.txt',
721
+ ],
722
+ )
723
+ self.assertEqual(
724
+ collector.opened_for_write,
725
+ [
726
+ 'w backups/.phlb_test',
727
+ 'a backups/source/2026-01-01-123456-backup.log',
728
+ 'wb backups/source/2026-01-01-123456/file1.txt',
729
+ 'a backups/source/2026-01-01-123456/SHA256SUMS',
730
+ 'wb backups/source/2026-01-01-123456/file2.txt',
731
+ 'wb backups/source/2026-01-01-123456/file3.txt',
732
+ 'a backups/source/2026-01-01-123456/SHA256SUMS',
733
+ 'w backups/source/2026-01-01-123456-summary.txt',
734
+ ],
735
+ )
736
+ self.assertEqual(redirected_out.stderr, '')
737
+ self.assertIn('Backup complete', redirected_out.stdout)
738
+ self.assertIn('Errors during backup:', redirected_out.stdout)
739
+
740
+ log_file = result.log_file
741
+ assert_is_file(log_file)
742
+ self.assertEqual(str(log_file), f'{self.temp_path}/backups/source/2026-01-01-123456-backup.log')
743
+ logs = log_file.read_text()
744
+ self.assertIn(
745
+ f'Backup {self.src_root / "file2.txt"} PermissionError: Bam!\n',
746
+ logs,
747
+ )
748
+ self.assertIn('\nTraceback (most recent call last):\n', logs)
749
+ self.assertIn(
750
+ f'Removing incomplete file {self.temp_path}/backups/source/2026-01-01-123456/file2.txt'
751
+ ' due to error: Bam!\n',
752
+ logs,
753
+ )
754
+ self.assertEqual(
755
+ result,
756
+ BackupResult(
757
+ backup_dir=result.backup_dir,
758
+ log_file=log_file,
759
+ backup_count=3,
760
+ backup_size=18,
761
+ symlink_files=0,
762
+ hardlinked_files=0,
763
+ hardlinked_size=0,
764
+ copied_files=2,
765
+ copied_size=12,
766
+ copied_small_files=2,
767
+ copied_small_size=12,
768
+ error_count=1,
769
+ ),
770
+ )
771
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
772
+ assert_fs_tree_overview(
773
+ root=result.backup_dir,
774
+ expected_overview="""
775
+ path birthtime type nlink size CRC32
776
+ SHA256SUMS <mock> file 1 152 563342a4
777
+ file1.txt 12:00:00 file 1 6 07573806
778
+ file3.txt 12:00:00 file 1 6 e959592a
779
+ """, # file2.txt is missing!
780
+ )
781
+
782
+ #######################################################################################
783
+ # Compare the backup
784
+
785
+ assert_compare_backup(
786
+ test_case=self,
787
+ src_root=self.src_root,
788
+ backup_root=self.backup_root,
789
+ std_out_parts=(
790
+ 'Compare completed.',
791
+ 'file2.txt not found', # <<< the error we expect
792
+ ),
793
+ excludes=('.cache',),
794
+ excpected_last_timestamp='2026-01-01-123456', # Freezed time, see above
795
+ excpected_total_file_count=3,
796
+ excpected_successful_file_count=2,
797
+ excpected_error_count=0,
798
+ )
799
+
800
+ def test_skip_sha256sums_file(self):
801
+ (self.src_root / 'SHA256SUMS').write_text('dummy hash content')
802
+ (self.src_root / 'file.txt').write_text('normal file')
803
+
804
+ with CollectOpenFiles(self.temp_path) as collector:
805
+ redirected_out, result = self.create_backup(
806
+ time_to_freeze='2026-01-01T12:34:56Z',
807
+ log_file_level='debug', # Skip SHA256SUMS is logged at DEBUG level
808
+ )
809
+ self.assertEqual(
810
+ collector.opened_for_read,
811
+ [
812
+ 'r backups/.phlb_test_link',
813
+ 'rb source/file.txt',
814
+ ],
815
+ )
816
+ self.assertEqual(
817
+ collector.opened_for_write,
818
+ [
819
+ 'w backups/.phlb_test',
820
+ 'a backups/source/2026-01-01-123456-backup.log',
821
+ 'wb backups/source/2026-01-01-123456/file.txt',
822
+ 'a backups/source/2026-01-01-123456/SHA256SUMS',
823
+ 'w backups/source/2026-01-01-123456-summary.txt',
824
+ ],
825
+ )
826
+ backup_dir = result.backup_dir
827
+
828
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
829
+ assert_fs_tree_overview(
830
+ root=backup_dir,
831
+ expected_overview="""
832
+ path birthtime type nlink size CRC32
833
+ SHA256SUMS <mock> file 1 75 9570b1e4
834
+ file.txt 12:00:00 file 1 11 e29f436e
835
+ """,
836
+ )
837
+
838
+ self.assertEqual(
839
+ (backup_dir / 'SHA256SUMS').read_text(),
840
+ # Not the dummy content -> the real SHA256SUMS file content:
841
+ '87f644d525b412d6162932d06db1bc06aaa0508374badc861e40ad85b0e01412 file.txt\n',
842
+ )
843
+
844
+ self.assertIn(
845
+ 'Skip existing SHA256SUMS file',
846
+ result.log_file.read_text(),
847
+ )
848
+
849
+ def test_large_file_handling(self):
850
+ (self.src_root / 'large_fileA.txt').write_bytes(b'A' * 1001)
851
+
852
+ with patch('PyHardLinkBackup.backup.CHUNK_SIZE', 1000), CollectOpenFiles(self.temp_path) as collector:
853
+ redirected_out, result = self.create_backup(time_to_freeze='2026-01-11T12:34:56Z')
854
+ self.assertEqual(
855
+ collector.opened_for_read,
856
+ [
857
+ 'r backups/.phlb_test_link',
858
+ 'rb source/large_fileA.txt',
859
+ ],
860
+ )
861
+ self.assertEqual(
862
+ collector.opened_for_write,
863
+ [
864
+ 'w backups/.phlb_test',
865
+ 'a backups/source/2026-01-11-123456-backup.log',
866
+ 'wb backups/source/2026-01-11-123456/large_fileA.txt',
867
+ 'w backups/.phlb/hash-lookup/23/d2/23d2ce40d26211a9ffe8096fd1f927f2abd094691839d24f88440f7c5168d500',
868
+ 'a backups/source/2026-01-11-123456/SHA256SUMS',
869
+ 'w backups/source/2026-01-11-123456-summary.txt',
870
+ ],
871
+ )
872
+ backup_dir = result.backup_dir
873
+
874
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
875
+ assert_fs_tree_overview(
876
+ root=backup_dir,
877
+ expected_overview="""
878
+ path birthtime type nlink size CRC32
879
+ SHA256SUMS <mock> file 1 82 c3dd960b
880
+ large_fileA.txt 12:00:00 file 1 1001 a48f0e33
881
+ """,
882
+ )
883
+
884
+ self.assertEqual(
885
+ (backup_dir / 'SHA256SUMS').read_text(),
886
+ '23d2ce40d26211a9ffe8096fd1f927f2abd094691839d24f88440f7c5168d500 large_fileA.txt\n',
887
+ )
888
+
889
+ # Same size, different content -> should be copied again:
890
+ (self.src_root / 'large_fileB.txt').write_bytes(b'B' * 1001)
891
+
892
+ with patch('PyHardLinkBackup.backup.CHUNK_SIZE', 1000), CollectOpenFiles(self.temp_path) as collector:
893
+ redirected_out, result = self.create_backup(time_to_freeze='2026-02-22T12:34:56Z')
894
+ self.assertEqual(
895
+ collector.opened_for_read,
896
+ [
897
+ 'r backups/.phlb_test_link',
898
+ 'rb source/large_fileA.txt',
899
+ 'r backups/.phlb/hash-lookup/23/d2/23d2ce40d26211a9ffe8096fd1f927f2abd094691839d24f88440f7c5168d500',
900
+ 'rb source/large_fileB.txt',
901
+ 'r backups/.phlb/hash-lookup/2a/92/2a925556d3ec9e4258624a324cd9300a9a3d9c86dac6bbbb63071bdb7787afd2',
902
+ 'rb source/large_fileB.txt',
903
+ ],
904
+ )
905
+ self.assertEqual(
906
+ collector.opened_for_write,
907
+ [
908
+ 'w backups/.phlb_test',
909
+ 'a backups/source/2026-02-22-123456-backup.log',
910
+ 'a backups/source/2026-02-22-123456/SHA256SUMS',
911
+ 'wb backups/source/2026-02-22-123456/large_fileB.txt',
912
+ 'w backups/.phlb/hash-lookup/2a/92/2a925556d3ec9e4258624a324cd9300a9a3d9c86dac6bbbb63071bdb7787afd2',
913
+ 'a backups/source/2026-02-22-123456/SHA256SUMS',
914
+ 'w backups/source/2026-02-22-123456-summary.txt',
915
+ ],
916
+ )
917
+ backup_dir = result.backup_dir
918
+
919
+ self.assertEqual(
920
+ (backup_dir / 'large_fileA.txt').read_text()[:50],
921
+ 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', # ... AAA
922
+ )
923
+ self.assertEqual(
924
+ (backup_dir / 'large_fileB.txt').read_text()[:50],
925
+ 'BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB', # ... BBB
926
+ )
927
+
928
+ log_file_content = result.log_file.read_text()
929
+ self.assertIn(
930
+ f'Hardlink duplicate file: {self.temp_path}/backups/source/2026-02-22-123456/large_fileA.txt'
931
+ f' to {self.temp_path}/backups/source/2026-01-11-123456/large_fileA.txt',
932
+ log_file_content,
933
+ )
934
+ self.assertIn(
935
+ f'Copy unique file: {self.temp_path}/source/large_fileB.txt'
936
+ f' to {self.temp_path}/backups/source/2026-02-22-123456/large_fileB.txt',
937
+ log_file_content,
938
+ )
939
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
940
+ assert_fs_tree_overview(
941
+ root=self.backup_root / 'source',
942
+ expected_overview="""
943
+ path birthtime type nlink size CRC32
944
+ 2026-01-11-123456-backup.log <mock> file 1 <mock> <mock>
945
+ 2026-01-11-123456-summary.txt <mock> file 1 <mock> <mock>
946
+ 2026-01-11-123456/SHA256SUMS <mock> file 1 82 c3dd960b
947
+ 2026-01-11-123456/large_fileA.txt 12:00:00 hardlink 2 1001 a48f0e33
948
+ 2026-02-22-123456-backup.log <mock> file 1 <mock> <mock>
949
+ 2026-02-22-123456-summary.txt <mock> file 1 <mock> <mock>
950
+ 2026-02-22-123456/SHA256SUMS <mock> file 1 164 3130cbcb
951
+ 2026-02-22-123456/large_fileA.txt 12:00:00 hardlink 2 1001 a48f0e33
952
+ 2026-02-22-123456/large_fileB.txt 12:00:00 file 1 1001 42c06e4a
953
+ """,
954
+ )
955
+
956
+ def test_symlinked_directories(self):
957
+ (self.src_root / 'root_file.txt').write_text('root file')
958
+ sub_dir = self.src_root / 'subdir'
959
+ sub_dir.mkdir()
960
+ (sub_dir / 'file_in_subdir.txt').write_text('subdir file')
961
+
962
+ symlinked_subdir = self.src_root / 'symlinked_subdir'
963
+ symlinked_subdir.symlink_to(sub_dir, target_is_directory=True)
964
+
965
+ with CollectOpenFiles(self.temp_path) as collector:
966
+ redirected_out, result = self.create_backup(
967
+ time_to_freeze='2026-01-01T12:34:56Z',
968
+ log_file_level='debug',
969
+ )
970
+ backup_dir = result.backup_dir
971
+
972
+ """DocWrite: README.md ## backup implementation - Symlinks
973
+ A directory symlink will copy into the backup and points to the original subdir."""
974
+ dir_symlink = backup_dir / 'symlinked_subdir'
975
+ self.assertEqual(dir_symlink.readlink(), self.src_root / 'subdir')
976
+ self.assertIs(dir_symlink.is_symlink(), True)
977
+ self.assertIs(dir_symlink.is_dir(), True)
978
+
979
+ self.assertIn(
980
+ f'Copy symlink: {self.src_root}/symlinked_subdir to {backup_dir}/symlinked_subdir (is directory: True)\n',
981
+ result.log_file.read_text(),
982
+ )
983
+
984
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
985
+ assert_fs_tree_overview(
986
+ root=backup_dir,
987
+ expected_overview="""
988
+ path birthtime type nlink size CRC32
989
+ SHA256SUMS <mock> file 1 80 b60fc460
990
+ root_file.txt 12:00:00 file 1 9 b0c0f839
991
+ subdir/SHA256SUMS <mock> file 1 85 df6b59ff
992
+ subdir/file_in_subdir.txt 12:00:00 file 1 11 0ecdaf55
993
+ symlinked_subdir 12:00:00 dir-symlink <mock> - -
994
+ """,
995
+ )
996
+
997
+ self.assertEqual(
998
+ (backup_dir / 'SHA256SUMS').read_text(),
999
+ # No entry for directory symlink:
1000
+ '3d11afb68c4201f4c42f50009ad7cb3215268c5e17a7e87ac54cf1e5a703635d root_file.txt\n',
1001
+ )
1002
+
1003
+ self.assertEqual(
1004
+ collector.opened_for_read,
1005
+ [
1006
+ 'r backups/.phlb_test_link',
1007
+ 'rb source/subdir/file_in_subdir.txt',
1008
+ 'rb source/root_file.txt',
1009
+ ],
1010
+ )
1011
+ self.assertEqual(
1012
+ collector.opened_for_write,
1013
+ [
1014
+ 'w backups/.phlb_test',
1015
+ 'a backups/source/2026-01-01-123456-backup.log',
1016
+ 'wb backups/source/2026-01-01-123456/subdir/file_in_subdir.txt',
1017
+ 'a backups/source/2026-01-01-123456/subdir/SHA256SUMS',
1018
+ 'wb backups/source/2026-01-01-123456/root_file.txt',
1019
+ 'a backups/source/2026-01-01-123456/SHA256SUMS',
1020
+ 'w backups/source/2026-01-01-123456-summary.txt',
1021
+ ],
1022
+ )
1023
+
1024
+ #######################################################################################
1025
+ # Compare the backup
1026
+
1027
+ assert_compare_backup(
1028
+ test_case=self,
1029
+ src_root=self.src_root,
1030
+ backup_root=self.backup_root,
1031
+ std_out_parts=('Compare completed.',),
1032
+ excludes=('.cache',),
1033
+ excpected_last_timestamp='2026-01-01-123456', # Freezed time, see above
1034
+ excpected_total_file_count=2,
1035
+ excpected_successful_file_count=2,
1036
+ excpected_error_count=0,
1037
+ )
1038
+
1039
+ #######################################################################################
1040
+ # Break the symlinked directory:
1041
+
1042
+ self.assertEqual(symlinked_subdir.readlink(), sub_dir)
1043
+ self.assertIs(symlinked_subdir.exists(follow_symlinks=True), True)
1044
+ self.assertIs(symlinked_subdir.exists(follow_symlinks=False), True)
1045
+
1046
+ shutil.rmtree(sub_dir)
1047
+
1048
+ self.assertEqual(symlinked_subdir.readlink(), sub_dir)
1049
+ self.assertIs(symlinked_subdir.exists(follow_symlinks=True), False) # <<< broken now!
1050
+ self.assertIs(symlinked_subdir.exists(follow_symlinks=False), True)
1051
+
1052
+ #######################################################################################
1053
+ # Backup again:
1054
+
1055
+ with CollectOpenFiles(self.temp_path) as collector:
1056
+ redirected_out, result = self.create_backup(
1057
+ time_to_freeze='2026-01-23T12:34:56Z',
1058
+ log_file_level='debug',
1059
+ )
1060
+ backup_dir = result.backup_dir
1061
+
1062
+ # It's still a directory symlink in the backup and points to the original subdir:
1063
+ dir_symlink = backup_dir / 'symlinked_subdir'
1064
+ self.assertEqual(dir_symlink.readlink(), self.src_root / 'subdir')
1065
+ self.assertIs(dir_symlink.is_symlink(), True)
1066
+
1067
+ """DocWrite: README.md ## backup implementation - Symlinks
1068
+ If the directory symlink is broken, we still create the symlink in the backup,
1069
+ pointing to the original target. But in this case it's a file symlink."""
1070
+ self.assertIs(dir_symlink.is_dir(), False)
1071
+ self.assertIn(
1072
+ f'Copy symlink: {self.src_root}/symlinked_subdir to {backup_dir}/symlinked_subdir (is directory: False)\n',
1073
+ result.log_file.read_text(),
1074
+ )
1075
+
1076
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
1077
+ assert_fs_tree_overview(
1078
+ root=backup_dir,
1079
+ expected_overview="""
1080
+ path birthtime type nlink size CRC32
1081
+ SHA256SUMS <mock> file 1 80 b60fc460
1082
+ root_file.txt 12:00:00 file 1 9 b0c0f839
1083
+ symlinked_subdir - symlink - - -
1084
+ """,
1085
+ )
1086
+
1087
+ self.assertEqual(
1088
+ (backup_dir / 'SHA256SUMS').read_text(),
1089
+ # No entry for directory symlink:
1090
+ '3d11afb68c4201f4c42f50009ad7cb3215268c5e17a7e87ac54cf1e5a703635d root_file.txt\n',
1091
+ )
1092
+
1093
+ self.assertEqual(
1094
+ collector.opened_for_read,
1095
+ ['r backups/.phlb_test_link', 'rb source/root_file.txt'],
1096
+ )
1097
+ self.assertEqual(
1098
+ collector.opened_for_write,
1099
+ [
1100
+ 'w backups/.phlb_test',
1101
+ 'a backups/source/2026-01-23-123456-backup.log',
1102
+ 'wb backups/source/2026-01-23-123456/root_file.txt',
1103
+ 'a backups/source/2026-01-23-123456/SHA256SUMS',
1104
+ 'w backups/source/2026-01-23-123456-summary.txt',
1105
+ ],
1106
+ )
1107
+
1108
+ #######################################################################################
1109
+ # Compare the backup
1110
+
1111
+ assert_compare_backup(
1112
+ test_case=self,
1113
+ src_root=self.src_root,
1114
+ backup_root=self.backup_root,
1115
+ std_out_parts=(
1116
+ 'Compare completed.',
1117
+ f'Broken symlink {self.src_root}/symlinked_subdir',
1118
+ ),
1119
+ excludes=('.cache',),
1120
+ excpected_last_timestamp='2026-01-23-123456', # Freezed time, see above
1121
+ excpected_total_file_count=1,
1122
+ excpected_successful_file_count=1,
1123
+ excpected_error_count=0,
1124
+ )
1125
+
1126
+ def test_backup_name(self):
1127
+ """DocWrite: README.md # PyHardLinkBackup - Backup Naming
1128
+ The backup name is optional.
1129
+ If not provided, the name of the source directory is used.
1130
+ """
1131
+ (self.src_root / 'file.txt').touch()
1132
+
1133
+ redirected_out, result = self.create_backup(time_to_freeze='2026-01-01T12:34:56Z', backup_name=None)
1134
+ self.assertEqual(
1135
+ str(result.backup_dir.relative_to(self.temp_path)),
1136
+ 'backups/source/2026-01-01-123456',
1137
+ )
1138
+
1139
+ redirected_out, result = self.create_backup(time_to_freeze='2026-01-01T12:34:56Z', backup_name='My-Backup')
1140
+ self.assertEqual(
1141
+ str(result.backup_dir.relative_to(self.temp_path)),
1142
+ 'backups/My-Backup/2026-01-01-123456',
1143
+ )
1144
+ redirected_out, result = self.create_backup(time_to_freeze='2026-12-24T00:12:34Z', backup_name='My-Backup')
1145
+ self.assertEqual(
1146
+ str(result.backup_dir.relative_to(self.temp_path)),
1147
+ 'backups/My-Backup/2026-12-24-001234',
1148
+ )
1149
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
1150
+ assert_fs_tree_overview(
1151
+ root=self.backup_root,
1152
+ expected_overview="""
1153
+ path birthtime type nlink size CRC32
1154
+ My-Backup/2026-01-01-123456-backup.log <mock> file 1 <mock> <mock>
1155
+ My-Backup/2026-01-01-123456-summary.txt <mock> file 1 <mock> <mock>
1156
+ My-Backup/2026-01-01-123456/SHA256SUMS <mock> file 1 75 43d11c57
1157
+ My-Backup/2026-01-01-123456/file.txt 12:00:00 file 1 0 00000000
1158
+ My-Backup/2026-12-24-001234-backup.log <mock> file 1 <mock> <mock>
1159
+ My-Backup/2026-12-24-001234-summary.txt <mock> file 1 <mock> <mock>
1160
+ My-Backup/2026-12-24-001234/SHA256SUMS <mock> file 1 75 43d11c57
1161
+ My-Backup/2026-12-24-001234/file.txt 12:00:00 file 1 0 00000000
1162
+ source/2026-01-01-123456-backup.log <mock> file 1 <mock> <mock>
1163
+ source/2026-01-01-123456-summary.txt <mock> file 1 <mock> <mock>
1164
+ source/2026-01-01-123456/SHA256SUMS <mock> file 1 75 43d11c57
1165
+ source/2026-01-01-123456/file.txt 12:00:00 file 1 0 00000000
1166
+ """,
1167
+ )