PyHardLinkBackup 1.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyHardLinkBackup/__init__.py +7 -0
- PyHardLinkBackup/__main__.py +10 -0
- PyHardLinkBackup/backup.py +297 -0
- PyHardLinkBackup/cli_app/__init__.py +41 -0
- PyHardLinkBackup/cli_app/phlb.py +136 -0
- PyHardLinkBackup/cli_dev/__init__.py +70 -0
- PyHardLinkBackup/cli_dev/__main__.py +10 -0
- PyHardLinkBackup/cli_dev/benchmark.py +138 -0
- PyHardLinkBackup/cli_dev/code_style.py +12 -0
- PyHardLinkBackup/cli_dev/debugging.py +47 -0
- PyHardLinkBackup/cli_dev/packaging.py +62 -0
- PyHardLinkBackup/cli_dev/shell_completion.py +23 -0
- PyHardLinkBackup/cli_dev/testing.py +52 -0
- PyHardLinkBackup/cli_dev/update_readme_history.py +33 -0
- PyHardLinkBackup/compare_backup.py +259 -0
- PyHardLinkBackup/constants.py +18 -0
- PyHardLinkBackup/logging_setup.py +124 -0
- PyHardLinkBackup/rebuild_databases.py +217 -0
- PyHardLinkBackup/tests/__init__.py +36 -0
- PyHardLinkBackup/tests/test_backup.py +1167 -0
- PyHardLinkBackup/tests/test_compare_backup.py +167 -0
- PyHardLinkBackup/tests/test_doc_write.py +26 -0
- PyHardLinkBackup/tests/test_doctests.py +10 -0
- PyHardLinkBackup/tests/test_project_setup.py +46 -0
- PyHardLinkBackup/tests/test_readme.py +75 -0
- PyHardLinkBackup/tests/test_readme_history.py +9 -0
- PyHardLinkBackup/tests/test_rebuild_database.py +266 -0
- PyHardLinkBackup/utilities/__init__.py +0 -0
- PyHardLinkBackup/utilities/file_hash_database.py +62 -0
- PyHardLinkBackup/utilities/file_size_database.py +46 -0
- PyHardLinkBackup/utilities/filesystem.py +257 -0
- PyHardLinkBackup/utilities/humanize.py +39 -0
- PyHardLinkBackup/utilities/rich_utils.py +237 -0
- PyHardLinkBackup/utilities/sha256sums.py +61 -0
- PyHardLinkBackup/utilities/tee.py +40 -0
- PyHardLinkBackup/utilities/tests/__init__.py +0 -0
- PyHardLinkBackup/utilities/tests/test_file_hash_database.py +153 -0
- PyHardLinkBackup/utilities/tests/test_file_size_database.py +151 -0
- PyHardLinkBackup/utilities/tests/test_filesystem.py +167 -0
- PyHardLinkBackup/utilities/tests/unittest_utilities.py +78 -0
- PyHardLinkBackup/utilities/tyro_cli_shared_args.py +29 -0
- pyhardlinkbackup-1.8.1.dist-info/METADATA +700 -0
- pyhardlinkbackup-1.8.1.dist-info/RECORD +45 -0
- pyhardlinkbackup-1.8.1.dist-info/WHEEL +4 -0
- pyhardlinkbackup-1.8.1.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import tempfile
|
|
3
|
+
import textwrap
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from bx_py_utils.path import assert_is_dir
|
|
7
|
+
from bx_py_utils.test_utils.assertion import assert_text_equal
|
|
8
|
+
from bx_py_utils.test_utils.log_utils import NoLogs
|
|
9
|
+
from cli_base.cli_tools.test_utils.base_testcases import BaseTestCase
|
|
10
|
+
|
|
11
|
+
from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase, HashAlreadyExistsError
|
|
12
|
+
from PyHardLinkBackup.utilities.filesystem import iter_scandir_files
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TemporaryFileHashDatabase(tempfile.TemporaryDirectory):
|
|
16
|
+
def __enter__(self) -> FileHashDatabase:
|
|
17
|
+
temp_dir = super().__enter__()
|
|
18
|
+
backup_root = Path(temp_dir).resolve()
|
|
19
|
+
|
|
20
|
+
phlb_conf_dir = backup_root / '.phlb'
|
|
21
|
+
phlb_conf_dir.mkdir()
|
|
22
|
+
|
|
23
|
+
hash_db = FileHashDatabase(backup_root=backup_root, phlb_conf_dir=phlb_conf_dir)
|
|
24
|
+
return hash_db
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_hash_db_filenames(hash_db: FileHashDatabase) -> list[str]:
|
|
28
|
+
# with NoLogs('PyHardLinkBackup.utilities.filesystem'):
|
|
29
|
+
return sorted(
|
|
30
|
+
str(Path(entry.path).relative_to(hash_db.base_path))
|
|
31
|
+
for entry in iter_scandir_files(
|
|
32
|
+
path=hash_db.base_path,
|
|
33
|
+
one_file_system=False,
|
|
34
|
+
src_device_id=None,
|
|
35
|
+
excludes=set(),
|
|
36
|
+
)
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_hash_db_info(backup_root: Path) -> str:
|
|
41
|
+
db_base_path = backup_root / '.phlb' / 'hash-lookup'
|
|
42
|
+
assert_is_dir(db_base_path)
|
|
43
|
+
|
|
44
|
+
with NoLogs(logger_name='XY'):
|
|
45
|
+
lines = []
|
|
46
|
+
for entry in iter_scandir_files(
|
|
47
|
+
path=db_base_path,
|
|
48
|
+
one_file_system=False,
|
|
49
|
+
src_device_id=None,
|
|
50
|
+
excludes=set(),
|
|
51
|
+
):
|
|
52
|
+
hash_path = Path(entry.path)
|
|
53
|
+
rel_path = hash_path.relative_to(db_base_path)
|
|
54
|
+
rel_file_path = hash_path.read_text()
|
|
55
|
+
lines.append(f'{str(rel_path)[:20]}… -> {rel_file_path}')
|
|
56
|
+
return '\n'.join(sorted(lines))
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def assert_hash_db_info(backup_root: Path, expected: str):
|
|
60
|
+
expected = textwrap.dedent(expected).strip()
|
|
61
|
+
actual = get_hash_db_info(backup_root)
|
|
62
|
+
assert_text_equal(
|
|
63
|
+
actual,
|
|
64
|
+
expected,
|
|
65
|
+
msg=f'FileHashDatabase info does not match as expected.\n\n{actual}\n\n',
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class FileHashDatabaseTestCase(BaseTestCase):
|
|
70
|
+
def test_happy_path(self):
|
|
71
|
+
with TemporaryFileHashDatabase() as hash_db:
|
|
72
|
+
self.assertIsInstance(hash_db, FileHashDatabase)
|
|
73
|
+
|
|
74
|
+
backup_root_path = hash_db.backup_root
|
|
75
|
+
assert_is_dir(backup_root_path)
|
|
76
|
+
|
|
77
|
+
test_path = hash_db._get_hash_path('12345678abcdef')
|
|
78
|
+
self.assertEqual(test_path, hash_db.base_path / '12' / '34' / '12345678abcdef')
|
|
79
|
+
|
|
80
|
+
file_a_path = backup_root_path / 'rel/path/to/file-A'
|
|
81
|
+
file_a_path.parent.mkdir(parents=True, exist_ok=True)
|
|
82
|
+
file_a_path.touch()
|
|
83
|
+
|
|
84
|
+
self.assertIs(hash_db.get('12345678abcdef'), None)
|
|
85
|
+
self.assertIs('12345678abcdef' in hash_db, False)
|
|
86
|
+
hash_db['12345678abcdef'] = file_a_path
|
|
87
|
+
self.assertEqual(hash_db.get('12345678abcdef'), file_a_path)
|
|
88
|
+
self.assertIs('12345678abcdef' in hash_db, True)
|
|
89
|
+
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
90
|
+
self.assertEqual(
|
|
91
|
+
get_hash_db_filenames(hash_db),
|
|
92
|
+
['12/34/12345678abcdef'],
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
########################################################################################
|
|
96
|
+
# Another instance using the same directory:
|
|
97
|
+
|
|
98
|
+
another_hash_db = FileHashDatabase(
|
|
99
|
+
backup_root=hash_db.backup_root,
|
|
100
|
+
phlb_conf_dir=hash_db.base_path.parent,
|
|
101
|
+
)
|
|
102
|
+
self.assertEqual(another_hash_db.get('12345678abcdef'), file_a_path)
|
|
103
|
+
self.assertIs(another_hash_db.get('12abcd345678abcdef'), None)
|
|
104
|
+
|
|
105
|
+
file_b_path = backup_root_path / 'rel/path/to/file-B'
|
|
106
|
+
file_b_path.parent.mkdir(parents=True, exist_ok=True)
|
|
107
|
+
file_b_path.touch()
|
|
108
|
+
|
|
109
|
+
another_hash_db['12abcd345678abcdef'] = file_b_path
|
|
110
|
+
self.assertEqual(another_hash_db.get('12abcd345678abcdef'), file_b_path)
|
|
111
|
+
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
112
|
+
self.assertEqual(
|
|
113
|
+
get_hash_db_filenames(another_hash_db),
|
|
114
|
+
[
|
|
115
|
+
'12/34/12345678abcdef',
|
|
116
|
+
'12/ab/12abcd345678abcdef',
|
|
117
|
+
],
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
121
|
+
assert_hash_db_info(
|
|
122
|
+
backup_root=hash_db.backup_root,
|
|
123
|
+
expected="""
|
|
124
|
+
12/34/12345678abcdef… -> rel/path/to/file-A
|
|
125
|
+
12/ab/12abcd345678ab… -> rel/path/to/file-B
|
|
126
|
+
""",
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
########################################################################################
|
|
130
|
+
# Deny "overwrite" of existing hash:
|
|
131
|
+
|
|
132
|
+
with self.assertRaises(HashAlreadyExistsError):
|
|
133
|
+
hash_db['12abcd345678abcdef'] = 'foo/bar/baz' # already exists!
|
|
134
|
+
|
|
135
|
+
########################################################################################
|
|
136
|
+
# Don't use stale entries pointing to missing files:
|
|
137
|
+
|
|
138
|
+
self.assertEqual(hash_db.get('12345678abcdef'), file_a_path)
|
|
139
|
+
file_a_path.unlink()
|
|
140
|
+
|
|
141
|
+
"""DocWrite: README.md ## FileHashDatabase - Missing hardlink target file
|
|
142
|
+
We check if the hardlink source file still exists. If not, we remove the hash entry from the database.
|
|
143
|
+
A warning is logged in this case."""
|
|
144
|
+
with self.assertLogs(level=logging.WARNING) as logs:
|
|
145
|
+
self.assertIs(hash_db.get('12345678abcdef'), None)
|
|
146
|
+
self.assertIn('Hash database entry found, but file does not exist', ''.join(logs.output))
|
|
147
|
+
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
148
|
+
assert_hash_db_info(
|
|
149
|
+
backup_root=hash_db.backup_root,
|
|
150
|
+
expected="""
|
|
151
|
+
12/ab/12abcd345678ab… -> rel/path/to/file-B
|
|
152
|
+
""",
|
|
153
|
+
)
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import tempfile
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from bx_py_utils.test_utils.log_utils import NoLogs
|
|
7
|
+
from cli_base.cli_tools.test_utils.base_testcases import BaseTestCase
|
|
8
|
+
|
|
9
|
+
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
10
|
+
from PyHardLinkBackup.utilities.filesystem import iter_scandir_files
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TemporaryFileSizeDatabase(tempfile.TemporaryDirectory):
|
|
14
|
+
def __enter__(self) -> FileSizeDatabase:
|
|
15
|
+
temp_dir = super().__enter__()
|
|
16
|
+
backup_root = Path(temp_dir).resolve()
|
|
17
|
+
|
|
18
|
+
phlb_conf_dir = backup_root / '.phlb'
|
|
19
|
+
phlb_conf_dir.mkdir()
|
|
20
|
+
|
|
21
|
+
size_db = FileSizeDatabase(phlb_conf_dir=phlb_conf_dir)
|
|
22
|
+
return size_db
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_size_db_filenames(size_db: FileSizeDatabase) -> Iterable[str]:
|
|
26
|
+
return sorted(
|
|
27
|
+
str(Path(entry.path).relative_to(size_db.base_path))
|
|
28
|
+
for entry in iter_scandir_files(
|
|
29
|
+
path=size_db.base_path,
|
|
30
|
+
one_file_system=False,
|
|
31
|
+
src_device_id=None,
|
|
32
|
+
excludes=set(),
|
|
33
|
+
)
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_sizes(size_db: FileSizeDatabase) -> Iterable[int]:
|
|
38
|
+
with NoLogs('PyHardLinkBackup.utilities.filesystem'):
|
|
39
|
+
return sorted(
|
|
40
|
+
int(entry.name)
|
|
41
|
+
for entry in iter_scandir_files(
|
|
42
|
+
path=size_db.base_path,
|
|
43
|
+
one_file_system=False,
|
|
44
|
+
src_device_id=None,
|
|
45
|
+
excludes=set(),
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class FileSizeDatabaseTestCase(BaseTestCase):
|
|
51
|
+
def test_happy_path(self):
|
|
52
|
+
with TemporaryFileSizeDatabase() as size_db:
|
|
53
|
+
self.assertIsInstance(size_db, FileSizeDatabase)
|
|
54
|
+
|
|
55
|
+
test_path1 = size_db._get_size_path(1234)
|
|
56
|
+
self.assertEqual(test_path1, size_db.base_path / '12' / '34' / '1234')
|
|
57
|
+
|
|
58
|
+
test_path2 = size_db._get_size_path(567890)
|
|
59
|
+
self.assertEqual(test_path2, size_db.base_path / '56' / '78' / '567890')
|
|
60
|
+
|
|
61
|
+
self.assertNotIn(1234, size_db)
|
|
62
|
+
self.assertNotIn(567890, size_db)
|
|
63
|
+
|
|
64
|
+
size_db.add(1234)
|
|
65
|
+
self.assertIn(1234, size_db)
|
|
66
|
+
self.assertNotIn(567890, size_db)
|
|
67
|
+
|
|
68
|
+
size_db.add(567890)
|
|
69
|
+
self.assertIn(1234, size_db)
|
|
70
|
+
self.assertIn(567890, size_db)
|
|
71
|
+
|
|
72
|
+
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
73
|
+
self.assertEqual(get_sizes(size_db), [1234, 567890])
|
|
74
|
+
self.assertEqual(
|
|
75
|
+
get_size_db_filenames(size_db),
|
|
76
|
+
[
|
|
77
|
+
'12/34/1234',
|
|
78
|
+
'56/78/567890',
|
|
79
|
+
],
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
########################################################################################
|
|
83
|
+
# Another instance using the same directory:
|
|
84
|
+
|
|
85
|
+
another_size_db = FileSizeDatabase(phlb_conf_dir=size_db.base_path.parent)
|
|
86
|
+
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
87
|
+
self.assertEqual(get_sizes(another_size_db), [1234, 567890])
|
|
88
|
+
self.assertEqual(
|
|
89
|
+
get_size_db_filenames(another_size_db),
|
|
90
|
+
[
|
|
91
|
+
'12/34/1234',
|
|
92
|
+
'56/78/567890',
|
|
93
|
+
],
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
########################################################################################
|
|
97
|
+
# "Share" directories:
|
|
98
|
+
|
|
99
|
+
for size in (123400001111, 123400002222, 128800003333, 129900004444):
|
|
100
|
+
self.assertNotIn(size, size_db)
|
|
101
|
+
size_db.add(size)
|
|
102
|
+
self.assertIn(size, size_db)
|
|
103
|
+
|
|
104
|
+
########################################################################################
|
|
105
|
+
# Min size is 1000 bytes:
|
|
106
|
+
|
|
107
|
+
"""DocWrite: README.md ## FileSizeDatabase - minimum file size
|
|
108
|
+
The minimum file size that can be stored in the FileSizeDatabase is 1000 bytes.
|
|
109
|
+
This is because no padding is made for sizes below 1000 bytes, which would
|
|
110
|
+
break the directory structure.
|
|
111
|
+
"""
|
|
112
|
+
self.assertEqual(FileSizeDatabase.MIN_SIZE, 1000)
|
|
113
|
+
"""DocWrite: README.md ## FileSizeDatabase - minimum file size
|
|
114
|
+
The idea is, that it's more efficient to backup small files directly, instead of
|
|
115
|
+
checking for duplicates via hardlinks. Therefore, small files below this size
|
|
116
|
+
are not tracked in the FileSizeDatabase.
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
with self.assertRaises(AssertionError):
|
|
120
|
+
size_db._get_size_path(999)
|
|
121
|
+
with self.assertRaises(AssertionError):
|
|
122
|
+
size_db.add(999)
|
|
123
|
+
with self.assertRaises(AssertionError):
|
|
124
|
+
999 in size_db
|
|
125
|
+
|
|
126
|
+
########################################################################################
|
|
127
|
+
# Check final state:
|
|
128
|
+
|
|
129
|
+
with self.assertLogs('PyHardLinkBackup', level=logging.DEBUG):
|
|
130
|
+
self.assertEqual(
|
|
131
|
+
get_size_db_filenames(size_db),
|
|
132
|
+
[
|
|
133
|
+
'12/34/1234',
|
|
134
|
+
'12/34/123400001111',
|
|
135
|
+
'12/34/123400002222',
|
|
136
|
+
'12/88/128800003333',
|
|
137
|
+
'12/99/129900004444',
|
|
138
|
+
'56/78/567890',
|
|
139
|
+
],
|
|
140
|
+
)
|
|
141
|
+
self.assertEqual(
|
|
142
|
+
get_sizes(size_db),
|
|
143
|
+
[
|
|
144
|
+
1234,
|
|
145
|
+
567890,
|
|
146
|
+
123400001111,
|
|
147
|
+
123400002222,
|
|
148
|
+
128800003333,
|
|
149
|
+
129900004444,
|
|
150
|
+
],
|
|
151
|
+
)
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import tempfile
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from unittest.mock import patch
|
|
7
|
+
|
|
8
|
+
from cli_base.cli_tools.test_utils.base_testcases import BaseTestCase
|
|
9
|
+
|
|
10
|
+
from PyHardLinkBackup.constants import HASH_ALGO
|
|
11
|
+
from PyHardLinkBackup.utilities.filesystem import (
|
|
12
|
+
copy_and_hash,
|
|
13
|
+
hash_file,
|
|
14
|
+
iter_scandir_files,
|
|
15
|
+
read_and_hash_file,
|
|
16
|
+
supports_hardlinks,
|
|
17
|
+
)
|
|
18
|
+
from PyHardLinkBackup.utilities.rich_utils import NoopProgress
|
|
19
|
+
from PyHardLinkBackup.utilities.tests.unittest_utilities import TemporaryDirectoryPath
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TestHashFile(BaseTestCase):
|
|
23
|
+
maxDiff = None
|
|
24
|
+
|
|
25
|
+
def test_hash_file(self):
|
|
26
|
+
self.assertEqual(
|
|
27
|
+
hashlib.new(HASH_ALGO, b'test content').hexdigest(),
|
|
28
|
+
'6ae8a75555209fd6c44157c0aed8016e763ff435a19cf186f76863140143ff72',
|
|
29
|
+
)
|
|
30
|
+
with tempfile.NamedTemporaryFile() as temp:
|
|
31
|
+
temp_file_path = Path(temp.name)
|
|
32
|
+
temp_file_path.write_bytes(b'test content')
|
|
33
|
+
|
|
34
|
+
with self.assertLogs(level='INFO') as logs:
|
|
35
|
+
file_hash = hash_file(temp_file_path, progress=NoopProgress(), total_size=123)
|
|
36
|
+
self.assertEqual(file_hash, '6ae8a75555209fd6c44157c0aed8016e763ff435a19cf186f76863140143ff72')
|
|
37
|
+
self.assertIn(' sha256 hash: 6ae8a7', ''.join(logs.output))
|
|
38
|
+
|
|
39
|
+
def test_copy_and_hash(self):
|
|
40
|
+
with TemporaryDirectoryPath() as temp_path:
|
|
41
|
+
src_path = temp_path / 'source.txt'
|
|
42
|
+
dst_path = temp_path / 'dest.txt'
|
|
43
|
+
|
|
44
|
+
src_path.write_bytes(b'test content')
|
|
45
|
+
|
|
46
|
+
with self.assertLogs(level='INFO') as logs:
|
|
47
|
+
file_hash = copy_and_hash(src=src_path, dst=dst_path, progress=NoopProgress(), total_size=123)
|
|
48
|
+
|
|
49
|
+
self.assertEqual(dst_path.read_bytes(), b'test content')
|
|
50
|
+
self.assertEqual(file_hash, '6ae8a75555209fd6c44157c0aed8016e763ff435a19cf186f76863140143ff72')
|
|
51
|
+
self.assertIn(' backup to ', ''.join(logs.output))
|
|
52
|
+
|
|
53
|
+
def test_read_and_hash_file(self):
|
|
54
|
+
with tempfile.NamedTemporaryFile() as temp:
|
|
55
|
+
temp_file_path = Path(temp.name)
|
|
56
|
+
temp_file_path.write_bytes(b'test content')
|
|
57
|
+
|
|
58
|
+
with self.assertLogs(level='INFO') as logs:
|
|
59
|
+
content, file_hash = read_and_hash_file(temp_file_path)
|
|
60
|
+
self.assertEqual(content, b'test content')
|
|
61
|
+
self.assertEqual(file_hash, '6ae8a75555209fd6c44157c0aed8016e763ff435a19cf186f76863140143ff72')
|
|
62
|
+
self.assertIn(' sha256 hash: 6ae8a7', ''.join(logs.output))
|
|
63
|
+
|
|
64
|
+
def test_iter_scandir_files(self):
|
|
65
|
+
with TemporaryDirectoryPath() as temp_path:
|
|
66
|
+
(temp_path / 'file1.txt').write_bytes(b'content1')
|
|
67
|
+
(temp_path / 'file2.txt').write_bytes(b'content2')
|
|
68
|
+
subdir = temp_path / 'subdir'
|
|
69
|
+
subdir.mkdir()
|
|
70
|
+
(subdir / 'file3.txt').write_bytes(b'content3')
|
|
71
|
+
|
|
72
|
+
symlink_dir = temp_path / 'symlink_dir2subdir'
|
|
73
|
+
symlink_dir.symlink_to(subdir, target_is_directory=True)
|
|
74
|
+
|
|
75
|
+
# Add a symlink to file1.txt
|
|
76
|
+
(temp_path / 'symlink_to_file1.txt').symlink_to(temp_path / 'file1.txt')
|
|
77
|
+
|
|
78
|
+
# Add a hardlink to file2.txt
|
|
79
|
+
os.link(temp_path / 'file2.txt', temp_path / 'hardlink_to_file2.txt')
|
|
80
|
+
|
|
81
|
+
exclude_subdir = temp_path / '__pycache__'
|
|
82
|
+
exclude_subdir.mkdir()
|
|
83
|
+
(exclude_subdir / 'BAM.txt').write_bytes(b'foobar')
|
|
84
|
+
|
|
85
|
+
broken_symlink_path = temp_path / 'broken_symlink'
|
|
86
|
+
broken_symlink_path.symlink_to(temp_path / 'not/existing/file.txt')
|
|
87
|
+
|
|
88
|
+
with self.assertLogs(level='DEBUG') as logs:
|
|
89
|
+
files = list(
|
|
90
|
+
iter_scandir_files(
|
|
91
|
+
path=temp_path,
|
|
92
|
+
one_file_system=False,
|
|
93
|
+
src_device_id=None,
|
|
94
|
+
excludes={'__pycache__'},
|
|
95
|
+
)
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
file_names = sorted([Path(f.path).relative_to(temp_path).as_posix() for f in files])
|
|
99
|
+
|
|
100
|
+
self.assertEqual(
|
|
101
|
+
file_names,
|
|
102
|
+
[
|
|
103
|
+
'broken_symlink',
|
|
104
|
+
'file1.txt',
|
|
105
|
+
'file2.txt',
|
|
106
|
+
'hardlink_to_file2.txt',
|
|
107
|
+
'subdir/file3.txt',
|
|
108
|
+
'symlink_dir2subdir',
|
|
109
|
+
'symlink_to_file1.txt',
|
|
110
|
+
],
|
|
111
|
+
)
|
|
112
|
+
logs = ''.join(logs.output)
|
|
113
|
+
self.assertIn('Scanning directory ', logs)
|
|
114
|
+
self.assertIn('Excluding directory ', logs)
|
|
115
|
+
|
|
116
|
+
def test_one_file_system(self):
|
|
117
|
+
def scan(temp_path, *, one_file_system, src_device_id):
|
|
118
|
+
with self.assertLogs(level='DEBUG') as logs:
|
|
119
|
+
files = list(
|
|
120
|
+
iter_scandir_files(
|
|
121
|
+
path=temp_path,
|
|
122
|
+
one_file_system=one_file_system,
|
|
123
|
+
src_device_id=src_device_id,
|
|
124
|
+
excludes=set(),
|
|
125
|
+
)
|
|
126
|
+
)
|
|
127
|
+
file_names = sorted([Path(f.path).relative_to(temp_path).as_posix() for f in files])
|
|
128
|
+
return file_names, '\n'.join(logs.output)
|
|
129
|
+
|
|
130
|
+
with TemporaryDirectoryPath() as temp_path:
|
|
131
|
+
(temp_path / 'file1.txt').touch()
|
|
132
|
+
subdir = temp_path / 'subdir'
|
|
133
|
+
subdir.mkdir()
|
|
134
|
+
(subdir / 'file2.txt').touch()
|
|
135
|
+
|
|
136
|
+
file_names, logs = scan(temp_path, one_file_system=False, src_device_id=None)
|
|
137
|
+
self.assertEqual(file_names, ['file1.txt', 'subdir/file2.txt'])
|
|
138
|
+
self.assertIn('Scanning directory ', logs)
|
|
139
|
+
self.assertNotIn('Skipping', logs)
|
|
140
|
+
|
|
141
|
+
file_names, logs = scan(temp_path, one_file_system=True, src_device_id='FooBar')
|
|
142
|
+
self.assertEqual(file_names, ['file1.txt'])
|
|
143
|
+
self.assertIn('Scanning directory ', logs)
|
|
144
|
+
self.assertIn('Skipping directory ', logs)
|
|
145
|
+
self.assertIn('different device ID', logs)
|
|
146
|
+
self.assertIn('(src device ID: FooBar)', logs)
|
|
147
|
+
|
|
148
|
+
def test_supports_hardlinks(self):
|
|
149
|
+
with TemporaryDirectoryPath() as temp_path:
|
|
150
|
+
with self.assertLogs(level=logging.INFO) as logs:
|
|
151
|
+
self.assertTrue(supports_hardlinks(temp_path))
|
|
152
|
+
self.assertEqual(
|
|
153
|
+
''.join(logs.output),
|
|
154
|
+
f'INFO:PyHardLinkBackup.utilities.filesystem:Hardlink support in {temp_path}: True',
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
with (
|
|
158
|
+
self.assertLogs(level=logging.ERROR) as logs,
|
|
159
|
+
patch('PyHardLinkBackup.utilities.filesystem.os.link', side_effect=OSError),
|
|
160
|
+
):
|
|
161
|
+
self.assertFalse(supports_hardlinks(temp_path))
|
|
162
|
+
logs = ''.join(logs.output)
|
|
163
|
+
self.assertIn(f'Hardlink test failed in {temp_path}:', logs)
|
|
164
|
+
self.assertIn('OSError', logs)
|
|
165
|
+
|
|
166
|
+
with self.assertLogs(level=logging.DEBUG), self.assertRaises(NotADirectoryError):
|
|
167
|
+
supports_hardlinks(Path('/not/existing/directory'))
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import pathlib
|
|
2
|
+
import tempfile
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from unittest.mock import patch
|
|
5
|
+
|
|
6
|
+
from bx_py_utils.test_utils.context_managers import MassContextManager
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TemporaryDirectoryPath(tempfile.TemporaryDirectory):
|
|
10
|
+
"""
|
|
11
|
+
Similar to tempfile.TemporaryDirectory,
|
|
12
|
+
but returns a resolved Path instance.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __enter__(self) -> Path:
|
|
16
|
+
super().__enter__()
|
|
17
|
+
return Path(self.name).resolve()
|
|
18
|
+
|
|
19
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
20
|
+
return super().__exit__(exc_type, exc_val, exc_tb)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class PyHardLinkBackupTestCaseMixin:
|
|
24
|
+
def setUp(self):
|
|
25
|
+
super().setUp()
|
|
26
|
+
self.temp_path_cm = TemporaryDirectoryPath()
|
|
27
|
+
self.temp_path = self.temp_path_cm.__enter__()
|
|
28
|
+
|
|
29
|
+
self.src_root = self.temp_path / 'source'
|
|
30
|
+
self.backup_root = self.temp_path / 'backups'
|
|
31
|
+
|
|
32
|
+
self.src_root.mkdir()
|
|
33
|
+
self.backup_root.mkdir()
|
|
34
|
+
|
|
35
|
+
def tearDown(self):
|
|
36
|
+
super().tearDown()
|
|
37
|
+
self.temp_path_cm.__exit__(None, None, None)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class CollectOpenFiles(MassContextManager):
|
|
41
|
+
"""
|
|
42
|
+
Context manager to collect opened files for read and write within a given root directory.
|
|
43
|
+
Raises an AssertionError if the same file is opened multiple times for read or write.
|
|
44
|
+
|
|
45
|
+
Works only for standard open() and pathlib.Path.open().
|
|
46
|
+
"""
|
|
47
|
+
def __init__(self, root: Path):
|
|
48
|
+
self.root = root
|
|
49
|
+
|
|
50
|
+
self.origin_open = open
|
|
51
|
+
self.mocks = (
|
|
52
|
+
patch('builtins.open', self.open_mock),
|
|
53
|
+
patch.object(pathlib.Path, 'open', self.make_path_open_wrapper()),
|
|
54
|
+
)
|
|
55
|
+
self.opened_for_read = []
|
|
56
|
+
self.opened_for_write = []
|
|
57
|
+
|
|
58
|
+
def open_mock(self, file, mode='r', *args, **kwargs):
|
|
59
|
+
rel_path = Path(file).resolve().relative_to(self.root)
|
|
60
|
+
|
|
61
|
+
if 'r' in mode and '+' not in mode:
|
|
62
|
+
if file in self.opened_for_read:
|
|
63
|
+
raise AssertionError(f'File {rel_path} already opened for read')
|
|
64
|
+
self.opened_for_read.append(f'{mode} {rel_path}')
|
|
65
|
+
elif any(m in mode for m in 'wax+'):
|
|
66
|
+
if file in self.opened_for_write:
|
|
67
|
+
raise AssertionError(f'File {rel_path} already opened for write')
|
|
68
|
+
self.opened_for_write.append(f'{mode} {rel_path}')
|
|
69
|
+
else:
|
|
70
|
+
raise NotImplementedError(f'Unsupported file open {mode=}')
|
|
71
|
+
|
|
72
|
+
return self.origin_open(file, mode, *args, **kwargs)
|
|
73
|
+
|
|
74
|
+
def make_path_open_wrapper(self):
|
|
75
|
+
def open_wrapper(path_self, *args, **kwargs):
|
|
76
|
+
return self.open_mock(path_self, *args, **kwargs)
|
|
77
|
+
|
|
78
|
+
return open_wrapper
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
import tyro
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
TyroExcludeDirectoriesArgType = Annotated[
|
|
7
|
+
tuple[str, ...],
|
|
8
|
+
tyro.conf.arg(
|
|
9
|
+
help='List of directories to exclude from backup.',
|
|
10
|
+
),
|
|
11
|
+
]
|
|
12
|
+
DEFAULT_EXCLUDE_DIRECTORIES = ('__pycache__', '.cache', '.temp', '.tmp', '.tox', '.nox')
|
|
13
|
+
|
|
14
|
+
TyroOneFileSystemArgType = Annotated[
|
|
15
|
+
bool,
|
|
16
|
+
tyro.conf.arg(
|
|
17
|
+
help='Do not cross filesystem boundaries.',
|
|
18
|
+
),
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
TyroBackupNameArgType = Annotated[
|
|
22
|
+
str | None,
|
|
23
|
+
tyro.conf.arg(
|
|
24
|
+
help=(
|
|
25
|
+
'Optional name for the backup (used to create a subdirectory in the backup destination).'
|
|
26
|
+
' If not provided, the name of the source directory is used.'
|
|
27
|
+
),
|
|
28
|
+
),
|
|
29
|
+
]
|