PyHardLinkBackup 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. PyHardLinkBackup/__init__.py +7 -0
  2. PyHardLinkBackup/__main__.py +10 -0
  3. PyHardLinkBackup/backup.py +261 -0
  4. PyHardLinkBackup/cli_app/__init__.py +41 -0
  5. PyHardLinkBackup/cli_app/phlb.py +123 -0
  6. PyHardLinkBackup/cli_dev/__init__.py +70 -0
  7. PyHardLinkBackup/cli_dev/benchmark.py +138 -0
  8. PyHardLinkBackup/cli_dev/code_style.py +12 -0
  9. PyHardLinkBackup/cli_dev/packaging.py +65 -0
  10. PyHardLinkBackup/cli_dev/shell_completion.py +23 -0
  11. PyHardLinkBackup/cli_dev/testing.py +52 -0
  12. PyHardLinkBackup/cli_dev/update_readme_history.py +33 -0
  13. PyHardLinkBackup/compare_backup.py +212 -0
  14. PyHardLinkBackup/constants.py +16 -0
  15. PyHardLinkBackup/logging_setup.py +124 -0
  16. PyHardLinkBackup/rebuild_databases.py +176 -0
  17. PyHardLinkBackup/tests/__init__.py +36 -0
  18. PyHardLinkBackup/tests/test_backup.py +628 -0
  19. PyHardLinkBackup/tests/test_compare_backup.py +86 -0
  20. PyHardLinkBackup/tests/test_doc_write.py +26 -0
  21. PyHardLinkBackup/tests/test_doctests.py +10 -0
  22. PyHardLinkBackup/tests/test_project_setup.py +46 -0
  23. PyHardLinkBackup/tests/test_readme.py +75 -0
  24. PyHardLinkBackup/tests/test_readme_history.py +9 -0
  25. PyHardLinkBackup/tests/test_rebuild_database.py +224 -0
  26. PyHardLinkBackup/utilities/__init__.py +0 -0
  27. PyHardLinkBackup/utilities/file_hash_database.py +62 -0
  28. PyHardLinkBackup/utilities/file_size_database.py +46 -0
  29. PyHardLinkBackup/utilities/filesystem.py +158 -0
  30. PyHardLinkBackup/utilities/humanize.py +39 -0
  31. PyHardLinkBackup/utilities/rich_utils.py +99 -0
  32. PyHardLinkBackup/utilities/sha256sums.py +61 -0
  33. PyHardLinkBackup/utilities/tee.py +40 -0
  34. PyHardLinkBackup/utilities/tests/__init__.py +0 -0
  35. PyHardLinkBackup/utilities/tests/test_file_hash_database.py +143 -0
  36. PyHardLinkBackup/utilities/tests/test_file_size_database.py +138 -0
  37. PyHardLinkBackup/utilities/tests/test_filesystem.py +126 -0
  38. PyHardLinkBackup/utilities/tyro_cli_shared_args.py +12 -0
  39. pyhardlinkbackup-1.5.0.dist-info/METADATA +600 -0
  40. pyhardlinkbackup-1.5.0.dist-info/RECORD +42 -0
  41. pyhardlinkbackup-1.5.0.dist-info/WHEEL +4 -0
  42. pyhardlinkbackup-1.5.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,628 @@
1
+ import datetime
2
+ import logging
3
+ import os
4
+ import tempfile
5
+ import textwrap
6
+ import unittest
7
+ import zlib
8
+ from collections.abc import Iterable
9
+ from pathlib import Path
10
+ from unittest.mock import patch
11
+
12
+ from bx_py_utils.path import assert_is_file
13
+ from bx_py_utils.test_utils.assertion import assert_text_equal
14
+ from bx_py_utils.test_utils.datetime import parse_dt
15
+ from bx_py_utils.test_utils.log_utils import NoLogs
16
+ from bx_py_utils.test_utils.redirect import RedirectOut
17
+ from cli_base.cli_tools.test_utils.base_testcases import OutputMustCapturedTestCaseMixin
18
+ from freezegun import freeze_time
19
+ from tabulate import tabulate
20
+
21
+ from PyHardLinkBackup.backup import BackupResult, backup_tree
22
+ from PyHardLinkBackup.constants import CHUNK_SIZE
23
+ from PyHardLinkBackup.logging_setup import DEFAULT_CONSOLE_LOG_LEVEL, DEFAULT_LOG_FILE_LEVEL, LoggingManager
24
+ from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
25
+ from PyHardLinkBackup.utilities.filesystem import copy_and_hash, iter_scandir_files
26
+ from PyHardLinkBackup.utilities.tests.test_file_hash_database import assert_hash_db_info
27
+
28
+
29
+ class SortedIterScandirFiles:
30
+ """
31
+ Important for stable tests: os.scandir() does not guarantee any order of the returned entries.
32
+ This class wraps iter_scandir_files() and yields the entries sorted by name.
33
+ """
34
+
35
+ def __init__(self, path: Path, excludes: set):
36
+ self.path = path
37
+ self.excludes = excludes
38
+
39
+ def __enter__(self):
40
+ return self
41
+
42
+ def __iter__(self) -> Iterable[os.DirEntry]:
43
+ scandir_iterator = iter_scandir_files(self.path, self.excludes)
44
+ yield from sorted(scandir_iterator, key=lambda e: e.name)
45
+
46
+ def __exit__(self, exc_type, exc_val, exc_tb):
47
+ pass
48
+
49
+
50
+ def set_file_times(path: Path, dt: datetime.datetime):
51
+ # move dt to UTC if it has timezone info:
52
+ if dt.tzinfo is not None:
53
+ dt = dt.astimezone(datetime.timezone.utc).replace(tzinfo=None)
54
+ fixed_time = dt.timestamp()
55
+ with NoLogs(logger_name=''):
56
+ for entry in iter_scandir_files(path, excludes=set()):
57
+ os.utime(entry.path, (fixed_time, fixed_time))
58
+
59
+
60
+ def _fs_tree_overview(root: Path) -> str:
61
+ lines = []
62
+ for entry in iter_scandir_files(root, excludes=set()):
63
+ file_path = Path(entry.path)
64
+ try:
65
+ file_stat = entry.stat()
66
+ except FileNotFoundError:
67
+ crc32 = '-'
68
+ nlink = '-'
69
+ size = '-'
70
+ birthtime = '-'
71
+ else:
72
+ is_log_file = entry.name.endswith('-backup.log') or entry.name.endswith('-summary.txt')
73
+ if is_log_file:
74
+ # flaky content!
75
+ crc32 = '<mock>'
76
+ size = '<mock>'
77
+ else:
78
+ crc32 = zlib.crc32(file_path.read_bytes())
79
+ crc32 = f'{crc32:08x}'
80
+ size = file_stat.st_size
81
+
82
+ nlink = file_stat.st_nlink
83
+
84
+ if entry.name == 'SHA256SUMS' or is_log_file:
85
+ birthtime = '<mock>'
86
+ else:
87
+ birthtime = getattr(file_stat, 'st_birthtime', file_stat.st_mtime)
88
+ birthtime = datetime.datetime.fromtimestamp(birthtime).strftime('%H:%M:%S')
89
+
90
+ if entry.is_symlink():
91
+ file_type = 'symlink'
92
+ elif nlink > 1:
93
+ file_type = 'hardlink'
94
+ else:
95
+ file_type = 'file'
96
+
97
+ lines.append(
98
+ [
99
+ str(file_path.relative_to(root)),
100
+ birthtime,
101
+ file_type,
102
+ nlink,
103
+ size,
104
+ crc32,
105
+ ]
106
+ )
107
+
108
+ result = tabulate(sorted(lines), headers=['path', 'birthtime', 'type', 'nlink', 'size', 'CRC32'], tablefmt='plain')
109
+ return result
110
+
111
+
112
+ def assert_fs_tree_overview(root: Path, expected_overview: str):
113
+ expected_overview = textwrap.dedent(expected_overview).strip()
114
+ actual_overview = _fs_tree_overview(root)
115
+ assert_text_equal(
116
+ actual_overview,
117
+ expected_overview,
118
+ msg=f'Filesystem tree overview does not match expected overview.\n\n{actual_overview}\n\n',
119
+ )
120
+
121
+
122
+ class BackupTreeTestCase(
123
+ OutputMustCapturedTestCaseMixin,
124
+ unittest.TestCase,
125
+ ):
126
+ def test_happy_path(self):
127
+ with tempfile.TemporaryDirectory() as temp_dir:
128
+ temp_path = Path(temp_dir).resolve()
129
+
130
+ src_root = temp_path / 'source'
131
+ backup_root = temp_path / 'backup'
132
+
133
+ src_root.mkdir()
134
+ backup_root.mkdir()
135
+
136
+ file1_path = src_root / 'file2.txt'
137
+ file1_path.write_text('This is file 1')
138
+
139
+ (src_root / 'symlink2file1').symlink_to(file1_path)
140
+ os.link(file1_path, src_root / 'hardlink2file1')
141
+
142
+ sub_dir = src_root / 'subdir'
143
+ sub_dir.mkdir()
144
+ (sub_dir / 'file.txt').write_text('This is file in subdir')
145
+
146
+ # Only files bigger than MIN_SIZE will be considered for hardlinking:
147
+ (src_root / 'min_sized_file1.bin').write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
148
+
149
+ # Same content and big enough to be considered for hardlinking:
150
+ (src_root / 'min_sized_file2.bin').write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
151
+
152
+ # Larger then CHUNK_SIZE file will be handled differently:
153
+ (src_root / 'large_file1.bin').write_bytes(b'Y' * (CHUNK_SIZE + 1))
154
+
155
+ excluded_dir = src_root / '.cache'
156
+ excluded_dir.mkdir()
157
+ (excluded_dir / 'tempfile.tmp').write_text('Temporary file that should be excluded')
158
+
159
+ # FIXME: freezegun doesn't handle this, see: https://github.com/spulec/freezegun/issues/392
160
+ # Set modification times to a fixed time for easier testing:
161
+ set_file_times(src_root, dt=parse_dt('2026-01-01T12:00:00+0000'))
162
+
163
+ #######################################################################################
164
+ # Create first backup:
165
+
166
+ with (
167
+ patch('PyHardLinkBackup.backup.iter_scandir_files', SortedIterScandirFiles),
168
+ freeze_time('2026-01-01T12:34:56Z', auto_tick_seconds=0),
169
+ RedirectOut() as redirected_out,
170
+ ):
171
+ result = backup_tree(
172
+ src_root=src_root,
173
+ backup_root=backup_root,
174
+ excludes=('.cache',),
175
+ log_manager=LoggingManager(
176
+ console_level='info',
177
+ file_level=DEFAULT_LOG_FILE_LEVEL,
178
+ ),
179
+ )
180
+ self.assertEqual(redirected_out.stderr, '')
181
+ self.assertIn('Backup complete', redirected_out.stdout)
182
+ backup_dir = result.backup_dir
183
+ self.assertEqual(
184
+ str(Path(backup_dir).relative_to(temp_path)),
185
+ 'backup/source/2026-01-01-123456',
186
+ )
187
+ log_file = result.log_file
188
+ self.assertEqual(
189
+ str(Path(log_file).relative_to(temp_path)),
190
+ 'backup/source/2026-01-01-123456-backup.log',
191
+ )
192
+ self.assertEqual(
193
+ result,
194
+ BackupResult(
195
+ backup_dir=backup_dir,
196
+ log_file=log_file,
197
+ backup_count=7,
198
+ backup_size=67110929,
199
+ symlink_files=1,
200
+ hardlinked_files=1,
201
+ hardlinked_size=1000,
202
+ copied_files=5,
203
+ copied_size=67109915,
204
+ copied_small_files=3,
205
+ copied_small_size=50,
206
+ error_count=0,
207
+ ),
208
+ redirected_out.stdout,
209
+ )
210
+
211
+ # The sources:
212
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
213
+ assert_fs_tree_overview(
214
+ root=src_root,
215
+ expected_overview="""
216
+ path birthtime type nlink size CRC32
217
+ .cache/tempfile.tmp 12:00:00 file 1 38 41d7a2c9
218
+ file2.txt 12:00:00 hardlink 2 14 8a11514a
219
+ hardlink2file1 12:00:00 hardlink 2 14 8a11514a
220
+ large_file1.bin 12:00:00 file 1 67108865 9671eaac
221
+ min_sized_file1.bin 12:00:00 file 1 1000 f0d93de4
222
+ min_sized_file2.bin 12:00:00 file 1 1000 f0d93de4
223
+ subdir/file.txt 12:00:00 file 1 22 c0167e63
224
+ symlink2file1 12:00:00 symlink 2 14 8a11514a
225
+ """,
226
+ )
227
+ # The backup:
228
+ # * /.cache/ -> excluded
229
+ # * min_sized_file1.bin and min_sized_file2.bin -> hardlinked
230
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
231
+ assert_fs_tree_overview(
232
+ root=backup_dir,
233
+ expected_overview="""
234
+ path birthtime type nlink size CRC32
235
+ SHA256SUMS <mock> file 1 411 b02da51e
236
+ file2.txt 12:00:00 file 1 14 8a11514a
237
+ hardlink2file1 12:00:00 file 1 14 8a11514a
238
+ large_file1.bin 12:00:00 file 1 67108865 9671eaac
239
+ min_sized_file1.bin 12:00:00 hardlink 2 1000 f0d93de4
240
+ min_sized_file2.bin 12:00:00 hardlink 2 1000 f0d93de4
241
+ subdir/SHA256SUMS <mock> file 1 75 1af5ecc7
242
+ subdir/file.txt 12:00:00 file 1 22 c0167e63
243
+ symlink2file1 12:00:00 symlink 2 14 8a11514a
244
+ """,
245
+ )
246
+
247
+ # Let's check our FileHashDatabase:
248
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
249
+ assert_hash_db_info(
250
+ backup_root=backup_root,
251
+ expected="""
252
+ bb/c4/bbc4de2ca238d1… -> source/2026-01-01-123456/min_sized_file1.bin
253
+ e6/37/e6374ac11d9049… -> source/2026-01-01-123456/large_file1.bin
254
+ """,
255
+ )
256
+
257
+ #######################################################################################
258
+ # Backup again with new added files:
259
+
260
+ # New small file with different size and different content:
261
+ (src_root / 'small_file_newA.txt').write_text('A new file')
262
+
263
+ # Add small file that size exists, but has different content:
264
+ (src_root / 'small_file_newB.txt').write_text('This is file 2')
265
+
266
+ # Bigger file with new size and new content:
267
+ (src_root / 'min_sized_file_newA.bin').write_bytes(b'A' * (FileSizeDatabase.MIN_SIZE + 1))
268
+
269
+ # Bigger file with existing size, but different content:
270
+ (src_root / 'min_sized_file_newB.bin').write_bytes(b'B' * FileSizeDatabase.MIN_SIZE)
271
+
272
+ # Add a larger then CHUNK_SIZE file with same existing size, but different content:
273
+ (src_root / 'large_file2.bin').write_bytes(b'Y' * (CHUNK_SIZE + 1))
274
+
275
+ # FIXME: freezegun doesn't handle this, see: https://github.com/spulec/freezegun/issues/392
276
+ # Set modification times to a fixed time for easier testing:
277
+ set_file_times(src_root, dt=parse_dt('2026-01-01T12:00:00+0000'))
278
+
279
+ with (
280
+ patch('PyHardLinkBackup.backup.iter_scandir_files', SortedIterScandirFiles),
281
+ freeze_time('2026-01-02T12:34:56Z', auto_tick_seconds=0),
282
+ RedirectOut() as redirected_out,
283
+ ):
284
+ result = backup_tree(
285
+ src_root=src_root,
286
+ backup_root=backup_root,
287
+ excludes=('.cache',),
288
+ log_manager=LoggingManager(
289
+ console_level='info',
290
+ file_level=DEFAULT_LOG_FILE_LEVEL,
291
+ ),
292
+ )
293
+ self.assertEqual(redirected_out.stderr, '')
294
+ self.assertIn('Backup complete', redirected_out.stdout)
295
+ backup_dir = result.backup_dir
296
+ self.assertEqual(
297
+ str(Path(backup_dir).relative_to(temp_path)),
298
+ 'backup/source/2026-01-02-123456',
299
+ )
300
+ # The second backup:
301
+ # * /.cache/ -> excluded
302
+ # * min_sized_file1.bin and min_sized_file2.bin -> hardlinked
303
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
304
+ assert_fs_tree_overview(
305
+ root=backup_dir,
306
+ expected_overview="""
307
+ path birthtime type nlink size CRC32
308
+ SHA256SUMS <mock> file 1 845 6596856a
309
+ file2.txt 12:00:00 file 1 14 8a11514a
310
+ hardlink2file1 12:00:00 file 1 14 8a11514a
311
+ large_file1.bin 12:00:00 hardlink 3 67108865 9671eaac
312
+ large_file2.bin 12:00:00 hardlink 3 67108865 9671eaac
313
+ min_sized_file1.bin 12:00:00 hardlink 4 1000 f0d93de4
314
+ min_sized_file2.bin 12:00:00 hardlink 4 1000 f0d93de4
315
+ min_sized_file_newA.bin 12:00:00 file 1 1001 a48f0e33
316
+ min_sized_file_newB.bin 12:00:00 file 1 1000 7d9c564d
317
+ small_file_newA.txt 12:00:00 file 1 10 76d1acf1
318
+ small_file_newB.txt 12:00:00 file 1 14 131800f0
319
+ subdir/SHA256SUMS <mock> file 1 75 1af5ecc7
320
+ subdir/file.txt 12:00:00 file 1 22 c0167e63
321
+ symlink2file1 12:00:00 symlink 2 14 8a11514a
322
+ """,
323
+ )
324
+ self.assertEqual(
325
+ result,
326
+ BackupResult(
327
+ backup_dir=backup_dir,
328
+ log_file=result.log_file,
329
+ backup_count=12,
330
+ backup_size=134221819,
331
+ symlink_files=1,
332
+ hardlinked_files=4,
333
+ hardlinked_size=134219730,
334
+ copied_files=7,
335
+ copied_size=2075,
336
+ copied_small_files=5,
337
+ copied_small_size=74,
338
+ error_count=0,
339
+ ),
340
+ redirected_out.stdout,
341
+ )
342
+
343
+ # The FileHashDatabase remains the same:
344
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
345
+ assert_hash_db_info(
346
+ backup_root=backup_root,
347
+ expected="""
348
+ 23/d2/23d2ce40d26211… -> source/2026-01-02-123456/min_sized_file_newA.bin
349
+ 9a/56/9a567077114134… -> source/2026-01-02-123456/min_sized_file_newB.bin
350
+ bb/c4/bbc4de2ca238d1… -> source/2026-01-01-123456/min_sized_file1.bin
351
+ e6/37/e6374ac11d9049… -> source/2026-01-01-123456/large_file1.bin
352
+ """,
353
+ )
354
+
355
+ #######################################################################################
356
+ # Don't create broken hardlinks!
357
+
358
+ """DocWrite: README.md ## FileHashDatabase - Missing hardlink target file
359
+ If a hardlink source from a old backup is missing, we cannot create a hardlink to it.
360
+ But it still works to hardlink same files within the current backup.
361
+ """
362
+
363
+ # Let's remove one of the files used for hardlinking from the first backup:
364
+ min_sized_file1_bak_path = backup_root / 'source/2026-01-01-123456/min_sized_file1.bin'
365
+ assert_is_file(min_sized_file1_bak_path)
366
+ min_sized_file1_bak_path.unlink()
367
+
368
+ # Backup again:
369
+ with (
370
+ patch('PyHardLinkBackup.backup.iter_scandir_files', SortedIterScandirFiles),
371
+ freeze_time('2026-01-03T12:34:56Z', auto_tick_seconds=0),
372
+ RedirectOut() as redirected_out,
373
+ ):
374
+ result = backup_tree(
375
+ src_root=src_root,
376
+ backup_root=backup_root,
377
+ excludes=('.cache',),
378
+ log_manager=LoggingManager(
379
+ console_level=DEFAULT_CONSOLE_LOG_LEVEL,
380
+ file_level=DEFAULT_LOG_FILE_LEVEL,
381
+ ),
382
+ )
383
+ self.assertEqual(redirected_out.stderr, '')
384
+ self.assertIn('Backup complete', redirected_out.stdout)
385
+ backup_dir = result.backup_dir
386
+
387
+ # Note: min_sized_file1.bin and min_sized_file2.bin are hardlinked,
388
+ # but not with the first backup anymore! So it's only nlink=2 now!
389
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
390
+ assert_fs_tree_overview(
391
+ root=backup_dir,
392
+ expected_overview="""
393
+ path birthtime type nlink size CRC32
394
+ SHA256SUMS <mock> file 1 845 6596856a
395
+ file2.txt 12:00:00 file 1 14 8a11514a
396
+ hardlink2file1 12:00:00 file 1 14 8a11514a
397
+ large_file1.bin 12:00:00 hardlink 5 67108865 9671eaac
398
+ large_file2.bin 12:00:00 hardlink 5 67108865 9671eaac
399
+ min_sized_file1.bin 12:00:00 hardlink 2 1000 f0d93de4
400
+ min_sized_file2.bin 12:00:00 hardlink 2 1000 f0d93de4
401
+ min_sized_file_newA.bin 12:00:00 hardlink 2 1001 a48f0e33
402
+ min_sized_file_newB.bin 12:00:00 hardlink 2 1000 7d9c564d
403
+ small_file_newA.txt 12:00:00 file 1 10 76d1acf1
404
+ small_file_newB.txt 12:00:00 file 1 14 131800f0
405
+ subdir/SHA256SUMS <mock> file 1 75 1af5ecc7
406
+ subdir/file.txt 12:00:00 file 1 22 c0167e63
407
+ symlink2file1 12:00:00 symlink 2 14 8a11514a
408
+ """,
409
+ )
410
+
411
+ self.assertEqual(
412
+ result,
413
+ BackupResult(
414
+ backup_dir=backup_dir,
415
+ log_file=result.log_file,
416
+ backup_count=12,
417
+ backup_size=134221819,
418
+ symlink_files=1,
419
+ hardlinked_files=5,
420
+ hardlinked_size=134220731,
421
+ copied_files=6,
422
+ copied_size=1074,
423
+ copied_small_files=5,
424
+ copied_small_size=74,
425
+ error_count=0
426
+ ),
427
+ )
428
+
429
+ # Note: min_sized_file1.bin is now from the 2026-01-03 backup!
430
+ self.assertEqual(backup_dir.name, '2026-01-03-123456') # Latest backup dir name
431
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
432
+ assert_hash_db_info(
433
+ backup_root=backup_root,
434
+ expected="""
435
+ 23/d2/23d2ce40d26211… -> source/2026-01-02-123456/min_sized_file_newA.bin
436
+ 9a/56/9a567077114134… -> source/2026-01-02-123456/min_sized_file_newB.bin
437
+ bb/c4/bbc4de2ca238d1… -> source/2026-01-03-123456/min_sized_file1.bin
438
+ e6/37/e6374ac11d9049… -> source/2026-01-01-123456/large_file1.bin
439
+ """,
440
+ )
441
+
442
+ def test_symlink(self):
443
+ with tempfile.TemporaryDirectory() as temp_dir:
444
+ temp_path = Path(temp_dir).resolve()
445
+
446
+ src_root = temp_path / 'src'
447
+ backup_root = temp_path / 'bak'
448
+
449
+ src_root.mkdir()
450
+ backup_root.mkdir()
451
+
452
+ source_file_path = src_root / 'source_file.txt'
453
+ source_file_path.write_text('File in the "source" directory.')
454
+
455
+ symlink2source_file_path = src_root / 'symlink2source'
456
+ symlink2source_file_path.symlink_to(source_file_path)
457
+ self.assertEqual(symlink2source_file_path.read_text(), 'File in the "source" directory.')
458
+
459
+ outside_file_path = temp_path / 'outside_file.txt'
460
+ outside_file_path.write_text('File outside the "source" directory!')
461
+
462
+ symlink2outside_file_path = src_root / 'symlink2outside'
463
+ symlink2outside_file_path.symlink_to(outside_file_path)
464
+ self.assertEqual(symlink2outside_file_path.read_text(), 'File outside the "source" directory!')
465
+
466
+ # FIXME: freezegun doesn't handle this, see: https://github.com/spulec/freezegun/issues/392
467
+ # Set modification times to a fixed time for easier testing:
468
+ set_file_times(src_root, dt=parse_dt('2026-01-01T12:00:00+0000'))
469
+
470
+ broken_symlink_path = src_root / 'broken_symlink'
471
+ broken_symlink_path.symlink_to(temp_path / 'not/existing/file.txt')
472
+ broken_symlink_path.is_symlink()
473
+
474
+ #######################################################################################
475
+ # Create first backup:
476
+
477
+ with (
478
+ freeze_time('2026-01-01T12:34:56Z', auto_tick_seconds=0),
479
+ RedirectOut() as redirected_out,
480
+ ):
481
+ result = backup_tree(
482
+ src_root=src_root,
483
+ backup_root=backup_root,
484
+ excludes=(),
485
+ log_manager=LoggingManager(
486
+ console_level=DEFAULT_CONSOLE_LOG_LEVEL,
487
+ file_level=DEFAULT_LOG_FILE_LEVEL,
488
+ ),
489
+ )
490
+ self.assertEqual(redirected_out.stderr, '')
491
+ self.assertIn('Backup complete', redirected_out.stdout)
492
+ backup_dir1 = result.backup_dir
493
+ self.assertEqual(
494
+ str(Path(backup_dir1).relative_to(temp_path)),
495
+ 'bak/src/2026-01-01-123456',
496
+ )
497
+
498
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
499
+ """DocWrite: README.md # PyHardLinkBackup - Notes
500
+ A log file is stored in the backup directory. e.g.:
501
+ * `bak/src/2026-01-01-123456-backup.log`
502
+
503
+ A finished backup also creates a summary file. e.g.:
504
+ * `bak/src/2026-01-01-123456-summary.txt`
505
+ """
506
+ assert_fs_tree_overview(
507
+ root=temp_path, # The complete overview os source + backup and outside file
508
+ expected_overview="""
509
+ path birthtime type nlink size CRC32
510
+ bak/src/2026-01-01-123456-backup.log <mock> file 1 <mock> <mock>
511
+ bak/src/2026-01-01-123456-summary.txt <mock> file 1 <mock> <mock>
512
+ bak/src/2026-01-01-123456/SHA256SUMS <mock> file 1 82 c03fd60e
513
+ bak/src/2026-01-01-123456/broken_symlink - symlink - - -
514
+ bak/src/2026-01-01-123456/source_file.txt 12:00:00 file 1 31 9309a10c
515
+ bak/src/2026-01-01-123456/symlink2outside 12:00:00 symlink 1 36 24b5bf4c
516
+ bak/src/2026-01-01-123456/symlink2source 12:00:00 symlink 1 31 9309a10c
517
+ outside_file.txt 12:00:00 file 1 36 24b5bf4c
518
+ src/broken_symlink - symlink - - -
519
+ src/source_file.txt 12:00:00 file 1 31 9309a10c
520
+ src/symlink2outside 12:00:00 symlink 1 36 24b5bf4c
521
+ src/symlink2source 12:00:00 symlink 1 31 9309a10c
522
+ """,
523
+ )
524
+
525
+ self.assertEqual(
526
+ result,
527
+ BackupResult(
528
+ backup_dir=backup_dir1,
529
+ log_file=result.log_file,
530
+ backup_count=4,
531
+ backup_size=98,
532
+ symlink_files=3,
533
+ hardlinked_files=0,
534
+ hardlinked_size=0,
535
+ copied_files=1,
536
+ copied_size=31,
537
+ copied_small_files=1,
538
+ copied_small_size=31,
539
+ error_count=0,
540
+ ),
541
+ )
542
+
543
+ """DocWrite: README.md ## backup implementation - Symlinks
544
+ Symlinks are copied as symlinks in the backup."""
545
+ self.assertEqual(
546
+ (backup_dir1 / 'symlink2outside').read_text(),
547
+ 'File outside the "source" directory!',
548
+ )
549
+ self.assertEqual(
550
+ (backup_dir1 / 'symlink2source').read_text(),
551
+ 'File in the "source" directory.',
552
+ )
553
+ self.assertEqual((backup_dir1 / 'symlink2outside').readlink(), outside_file_path)
554
+ self.assertEqual((backup_dir1 / 'symlink2source').readlink(), source_file_path)
555
+
556
+ """DocWrite: README.md ## backup implementation - Symlinks
557
+ Symlinks are not stored in our FileHashDatabase, because they are not considered for hardlinking."""
558
+ with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
559
+ assert_hash_db_info(backup_root=backup_root, expected='')
560
+
561
+ def test_error_handling(self):
562
+ with tempfile.TemporaryDirectory() as temp_dir:
563
+ temp_path = Path(temp_dir).resolve()
564
+
565
+ src_root = temp_path / 'source'
566
+ backup_root = temp_path / 'backup'
567
+
568
+ src_root.mkdir()
569
+ backup_root.mkdir()
570
+
571
+ (src_root / 'file1.txt').write_text('File 1')
572
+ (src_root / 'file2.txt').write_text('File 2')
573
+ (src_root / 'file3.txt').write_text('File 3')
574
+
575
+ # Set modification times to a fixed time for easier testing:
576
+ set_file_times(src_root, dt=parse_dt('2026-01-01T12:00:00+0000'))
577
+
578
+ def mocked_copy_and_hash(src: Path, dst: Path):
579
+ if src.name == 'file2.txt':
580
+ raise PermissionError('Bam!')
581
+ else:
582
+ return copy_and_hash(src, dst)
583
+
584
+ with (
585
+ patch('PyHardLinkBackup.backup.iter_scandir_files', SortedIterScandirFiles),
586
+ patch('PyHardLinkBackup.backup.copy_and_hash', mocked_copy_and_hash),
587
+ freeze_time('2026-01-01T12:34:56Z', auto_tick_seconds=0),
588
+ RedirectOut() as redirected_out,
589
+ ):
590
+ result = backup_tree(
591
+ src_root=src_root,
592
+ backup_root=backup_root,
593
+ excludes=('.cache',),
594
+ log_manager=LoggingManager(
595
+ console_level=DEFAULT_CONSOLE_LOG_LEVEL,
596
+ file_level=DEFAULT_LOG_FILE_LEVEL,
597
+ ),
598
+ )
599
+ self.assertEqual(redirected_out.stderr, '')
600
+ self.assertIn('Backup complete', redirected_out.stdout)
601
+ self.assertIn('Errors during backup:', redirected_out.stdout)
602
+
603
+ log_file = result.log_file
604
+ assert_is_file(log_file)
605
+ self.assertEqual(str(log_file), f'{temp_path}/backup/source/2026-01-01-123456-backup.log')
606
+ logs = log_file.read_text()
607
+ self.assertIn(
608
+ f'Backup {src_root / "file2.txt"} PermissionError: Bam!\n',
609
+ logs,
610
+ )
611
+ self.assertIn('\nTraceback (most recent call last):\n', logs)
612
+ self.assertEqual(
613
+ result,
614
+ BackupResult(
615
+ backup_dir=result.backup_dir,
616
+ log_file=log_file,
617
+ backup_count=3,
618
+ backup_size=18,
619
+ symlink_files=0,
620
+ hardlinked_files=0,
621
+ hardlinked_size=0,
622
+ copied_files=2,
623
+ copied_size=12,
624
+ copied_small_files=2,
625
+ copied_small_size=12,
626
+ error_count=1,
627
+ ),
628
+ )