PyHardLinkBackup 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyHardLinkBackup/__init__.py +7 -0
- PyHardLinkBackup/__main__.py +10 -0
- PyHardLinkBackup/backup.py +261 -0
- PyHardLinkBackup/cli_app/__init__.py +41 -0
- PyHardLinkBackup/cli_app/phlb.py +123 -0
- PyHardLinkBackup/cli_dev/__init__.py +70 -0
- PyHardLinkBackup/cli_dev/benchmark.py +138 -0
- PyHardLinkBackup/cli_dev/code_style.py +12 -0
- PyHardLinkBackup/cli_dev/packaging.py +65 -0
- PyHardLinkBackup/cli_dev/shell_completion.py +23 -0
- PyHardLinkBackup/cli_dev/testing.py +52 -0
- PyHardLinkBackup/cli_dev/update_readme_history.py +33 -0
- PyHardLinkBackup/compare_backup.py +212 -0
- PyHardLinkBackup/constants.py +16 -0
- PyHardLinkBackup/logging_setup.py +124 -0
- PyHardLinkBackup/rebuild_databases.py +176 -0
- PyHardLinkBackup/tests/__init__.py +36 -0
- PyHardLinkBackup/tests/test_backup.py +628 -0
- PyHardLinkBackup/tests/test_compare_backup.py +86 -0
- PyHardLinkBackup/tests/test_doc_write.py +26 -0
- PyHardLinkBackup/tests/test_doctests.py +10 -0
- PyHardLinkBackup/tests/test_project_setup.py +46 -0
- PyHardLinkBackup/tests/test_readme.py +75 -0
- PyHardLinkBackup/tests/test_readme_history.py +9 -0
- PyHardLinkBackup/tests/test_rebuild_database.py +224 -0
- PyHardLinkBackup/utilities/__init__.py +0 -0
- PyHardLinkBackup/utilities/file_hash_database.py +62 -0
- PyHardLinkBackup/utilities/file_size_database.py +46 -0
- PyHardLinkBackup/utilities/filesystem.py +158 -0
- PyHardLinkBackup/utilities/humanize.py +39 -0
- PyHardLinkBackup/utilities/rich_utils.py +99 -0
- PyHardLinkBackup/utilities/sha256sums.py +61 -0
- PyHardLinkBackup/utilities/tee.py +40 -0
- PyHardLinkBackup/utilities/tests/__init__.py +0 -0
- PyHardLinkBackup/utilities/tests/test_file_hash_database.py +143 -0
- PyHardLinkBackup/utilities/tests/test_file_size_database.py +138 -0
- PyHardLinkBackup/utilities/tests/test_filesystem.py +126 -0
- PyHardLinkBackup/utilities/tyro_cli_shared_args.py +12 -0
- pyhardlinkbackup-1.5.0.dist-info/METADATA +600 -0
- pyhardlinkbackup-1.5.0.dist-info/RECORD +42 -0
- pyhardlinkbackup-1.5.0.dist-info/WHEEL +4 -0
- pyhardlinkbackup-1.5.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import shutil
|
|
2
|
+
import tempfile
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from unittest import TestCase
|
|
5
|
+
|
|
6
|
+
from bx_py_utils.test_utils.redirect import RedirectOut
|
|
7
|
+
from cli_base.cli_tools.test_utils.base_testcases import OutputMustCapturedTestCaseMixin
|
|
8
|
+
|
|
9
|
+
from PyHardLinkBackup.compare_backup import CompareResult, LoggingManager, compare_tree
|
|
10
|
+
from PyHardLinkBackup.logging_setup import DEFAULT_LOG_FILE_LEVEL
|
|
11
|
+
from PyHardLinkBackup.utilities.file_hash_database import FileHashDatabase
|
|
12
|
+
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
13
|
+
from PyHardLinkBackup.utilities.filesystem import hash_file
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CompareBackupTestCase(OutputMustCapturedTestCaseMixin, TestCase):
|
|
17
|
+
def test_happy_path(self):
|
|
18
|
+
with tempfile.TemporaryDirectory() as src_dir, tempfile.TemporaryDirectory() as backup_dir:
|
|
19
|
+
src_root = Path(src_dir).resolve()
|
|
20
|
+
backup_root = Path(backup_dir).resolve()
|
|
21
|
+
|
|
22
|
+
# Setup backup structure
|
|
23
|
+
phlb_conf_dir = backup_root / '.phlb'
|
|
24
|
+
phlb_conf_dir.mkdir()
|
|
25
|
+
|
|
26
|
+
compare_main_dir = backup_root / src_root.name
|
|
27
|
+
compare_main_dir.mkdir()
|
|
28
|
+
|
|
29
|
+
timestamp = '2026-01-17-120000'
|
|
30
|
+
compare_dir = compare_main_dir / timestamp
|
|
31
|
+
compare_dir.mkdir()
|
|
32
|
+
|
|
33
|
+
# Create source files
|
|
34
|
+
(src_root / 'small_file.txt').write_text('hello world')
|
|
35
|
+
(src_root / 'large_file_missing.txt').write_bytes(b'X' * FileSizeDatabase.MIN_SIZE)
|
|
36
|
+
large_file_in_dbs = src_root / 'large_file_in_dbs.txt'
|
|
37
|
+
large_file_in_dbs.write_bytes(b'Y' * (FileSizeDatabase.MIN_SIZE + 1))
|
|
38
|
+
|
|
39
|
+
# Copy files to backup
|
|
40
|
+
total_size = 0
|
|
41
|
+
total_file_count = 0
|
|
42
|
+
for file_path in src_root.iterdir():
|
|
43
|
+
shutil.copy2(file_path, compare_dir / file_path.name)
|
|
44
|
+
total_size += file_path.stat().st_size
|
|
45
|
+
total_file_count += 1
|
|
46
|
+
self.assertEqual(total_file_count, 3)
|
|
47
|
+
self.assertEqual(total_size, 2012)
|
|
48
|
+
|
|
49
|
+
# Create databases and add values from 'large_file_in_dbs.txt'
|
|
50
|
+
size_db = FileSizeDatabase(phlb_conf_dir)
|
|
51
|
+
size_db.add(FileSizeDatabase.MIN_SIZE + 1)
|
|
52
|
+
hash_db = FileHashDatabase(backup_root, phlb_conf_dir)
|
|
53
|
+
src_hash = hash_file(large_file_in_dbs)
|
|
54
|
+
hash_db[src_hash] = compare_dir / 'large_file_in_dbs.txt'
|
|
55
|
+
|
|
56
|
+
# Run compare_tree
|
|
57
|
+
with RedirectOut() as redirected_out:
|
|
58
|
+
result = compare_tree(
|
|
59
|
+
src_root=src_root,
|
|
60
|
+
backup_root=backup_root,
|
|
61
|
+
excludes=(),
|
|
62
|
+
log_manager=LoggingManager(
|
|
63
|
+
console_level='info',
|
|
64
|
+
file_level=DEFAULT_LOG_FILE_LEVEL,
|
|
65
|
+
),
|
|
66
|
+
)
|
|
67
|
+
self.assertEqual(redirected_out.stderr, '')
|
|
68
|
+
self.assertIn('Compare completed.', redirected_out.stdout)
|
|
69
|
+
self.assertEqual(
|
|
70
|
+
result,
|
|
71
|
+
CompareResult(
|
|
72
|
+
compare_dir=compare_dir,
|
|
73
|
+
log_file=result.log_file,
|
|
74
|
+
total_file_count=total_file_count,
|
|
75
|
+
total_size=total_size,
|
|
76
|
+
src_file_new_count=0,
|
|
77
|
+
file_size_missmatch=0,
|
|
78
|
+
file_hash_missmatch=0,
|
|
79
|
+
small_file_count=1,
|
|
80
|
+
size_db_missing_count=1,
|
|
81
|
+
hash_db_missing_count=1,
|
|
82
|
+
successful_file_count=total_file_count,
|
|
83
|
+
error_count=0,
|
|
84
|
+
),
|
|
85
|
+
redirected_out.stdout,
|
|
86
|
+
)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from unittest import TestCase
|
|
2
|
+
|
|
3
|
+
from bx_py_utils.doc_write.api import GeneratedInfo, generate
|
|
4
|
+
from bx_py_utils.path import assert_is_file
|
|
5
|
+
|
|
6
|
+
from PyHardLinkBackup.cli_dev import PACKAGE_ROOT
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DocuWriteApiTestCase(TestCase):
|
|
10
|
+
def test_up2date_docs(self):
|
|
11
|
+
"""DocWrite: about-docs.md # generate Doc-Write
|
|
12
|
+
|
|
13
|
+
These documentation files are generated automatically with the "Doc-Write" tool.
|
|
14
|
+
They updated automatically by unittests.
|
|
15
|
+
|
|
16
|
+
More information about Doc-Write can be found here:
|
|
17
|
+
|
|
18
|
+
https://github.com/boxine/bx_py_utils/tree/master/bx_py_utils/doc_write
|
|
19
|
+
"""
|
|
20
|
+
assert_is_file(PACKAGE_ROOT / 'pyproject.toml')
|
|
21
|
+
|
|
22
|
+
with self.assertLogs():
|
|
23
|
+
info: GeneratedInfo = generate(base_path=PACKAGE_ROOT)
|
|
24
|
+
self.assertGreaterEqual(len(info.paths), 1)
|
|
25
|
+
self.assertEqual(info.update_count, 0, 'No files should be updated, commit the changes')
|
|
26
|
+
self.assertEqual(info.remove_count, 0, 'No files should be removed, commit the changes')
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
from unittest import TestCase
|
|
3
|
+
|
|
4
|
+
from bx_py_utils.path import assert_is_file
|
|
5
|
+
from cli_base.cli_tools.code_style import assert_code_style
|
|
6
|
+
from cli_base.cli_tools.subprocess_utils import ToolsExecutor
|
|
7
|
+
from manageprojects.test_utils.project_setup import check_editor_config, get_py_max_line_length
|
|
8
|
+
from packaging.version import Version
|
|
9
|
+
|
|
10
|
+
from PyHardLinkBackup import __version__
|
|
11
|
+
from PyHardLinkBackup.cli_dev import PACKAGE_ROOT
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ProjectSetupTestCase(TestCase):
|
|
15
|
+
def test_version(self):
|
|
16
|
+
self.assertIsNotNone(__version__)
|
|
17
|
+
|
|
18
|
+
version = Version(__version__) # Will raise InvalidVersion() if wrong formatted
|
|
19
|
+
self.assertEqual(str(version), __version__)
|
|
20
|
+
|
|
21
|
+
cli_bin = PACKAGE_ROOT / 'cli.py'
|
|
22
|
+
assert_is_file(cli_bin)
|
|
23
|
+
|
|
24
|
+
output = subprocess.check_output([cli_bin, 'version'], text=True)
|
|
25
|
+
self.assertIn(f'PyHardLinkBackup v{__version__}', output)
|
|
26
|
+
|
|
27
|
+
dev_cli_bin = PACKAGE_ROOT / 'dev-cli.py'
|
|
28
|
+
assert_is_file(dev_cli_bin)
|
|
29
|
+
|
|
30
|
+
output = subprocess.check_output([dev_cli_bin, 'version'], text=True)
|
|
31
|
+
self.assertIn(f'PyHardLinkBackup v{__version__}', output)
|
|
32
|
+
|
|
33
|
+
def test_code_style(self):
|
|
34
|
+
return_code = assert_code_style(package_root=PACKAGE_ROOT)
|
|
35
|
+
self.assertEqual(return_code, 0, 'Code style error, see output above!')
|
|
36
|
+
|
|
37
|
+
def test_check_editor_config(self):
|
|
38
|
+
check_editor_config(package_root=PACKAGE_ROOT)
|
|
39
|
+
|
|
40
|
+
max_line_length = get_py_max_line_length(package_root=PACKAGE_ROOT)
|
|
41
|
+
self.assertEqual(max_line_length, 119)
|
|
42
|
+
|
|
43
|
+
def test_pre_commit_hooks(self):
|
|
44
|
+
executor = ToolsExecutor(cwd=PACKAGE_ROOT)
|
|
45
|
+
for command in ('migrate-config', 'validate-config', 'validate-manifest'):
|
|
46
|
+
executor.verbose_check_call('pre-commit', command, exit_on_error=True)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from bx_py_utils.auto_doc import assert_readme_block
|
|
2
|
+
from bx_py_utils.path import assert_is_file
|
|
3
|
+
from cli_base.cli_tools.test_utils.rich_test_utils import NoColorEnvRich, invoke
|
|
4
|
+
from manageprojects.tests.base import BaseTestCase
|
|
5
|
+
|
|
6
|
+
from PyHardLinkBackup import constants
|
|
7
|
+
from PyHardLinkBackup.cli_dev import PACKAGE_ROOT
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def assert_cli_help_in_readme(text_block: str, marker: str):
|
|
11
|
+
README_PATH = PACKAGE_ROOT / 'README.md'
|
|
12
|
+
assert_is_file(README_PATH)
|
|
13
|
+
|
|
14
|
+
text_block = text_block.replace(constants.CLI_EPILOG, '')
|
|
15
|
+
text_block = f'```\n{text_block.strip()}\n```'
|
|
16
|
+
assert_readme_block(
|
|
17
|
+
readme_path=README_PATH,
|
|
18
|
+
text_block=text_block,
|
|
19
|
+
start_marker_line=f'[comment]: <> (✂✂✂ auto generated {marker} start ✂✂✂)',
|
|
20
|
+
end_marker_line=f'[comment]: <> (✂✂✂ auto generated {marker} end ✂✂✂)',
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ReadmeTestCase(BaseTestCase):
|
|
25
|
+
|
|
26
|
+
def test_main_help(self):
|
|
27
|
+
with NoColorEnvRich():
|
|
28
|
+
stdout = invoke(cli_bin=PACKAGE_ROOT / 'cli.py', args=['--help'], strip_line_prefix='usage: ')
|
|
29
|
+
|
|
30
|
+
self.assert_in_content(
|
|
31
|
+
got=stdout,
|
|
32
|
+
parts=(
|
|
33
|
+
'usage: ./cli.py [-h]',
|
|
34
|
+
' version ',
|
|
35
|
+
'Print version and exit',
|
|
36
|
+
constants.CLI_EPILOG,
|
|
37
|
+
),
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Installed via pipx is called 'phlb', not 'cli.py':
|
|
41
|
+
stdout = stdout.replace('./cli.py', 'phlb')
|
|
42
|
+
|
|
43
|
+
assert_cli_help_in_readme(text_block=stdout, marker='main help')
|
|
44
|
+
|
|
45
|
+
def test_backup_help(self):
|
|
46
|
+
with NoColorEnvRich():
|
|
47
|
+
stdout = invoke(cli_bin=PACKAGE_ROOT / 'cli.py', args=['backup', '--help'], strip_line_prefix='usage: ')
|
|
48
|
+
self.assert_in_content(
|
|
49
|
+
got=stdout,
|
|
50
|
+
parts=(
|
|
51
|
+
'usage: ./cli.py backup [-h] ',
|
|
52
|
+
'Backup the source directory to the destination',
|
|
53
|
+
),
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Installed via pipx is called 'phlb', not 'cli.py':
|
|
57
|
+
stdout = stdout.replace('./cli.py', 'phlb')
|
|
58
|
+
|
|
59
|
+
assert_cli_help_in_readme(text_block=stdout, marker='backup help')
|
|
60
|
+
|
|
61
|
+
def test_dev_help(self):
|
|
62
|
+
with NoColorEnvRich():
|
|
63
|
+
stdout = invoke(cli_bin=PACKAGE_ROOT / 'dev-cli.py', args=['--help'], strip_line_prefix='usage: ')
|
|
64
|
+
self.assert_in_content(
|
|
65
|
+
got=stdout,
|
|
66
|
+
parts=(
|
|
67
|
+
'usage: ./dev-cli.py [-h]',
|
|
68
|
+
' lint ',
|
|
69
|
+
' coverage ',
|
|
70
|
+
' update-readme-history ',
|
|
71
|
+
' publish ',
|
|
72
|
+
constants.CLI_EPILOG,
|
|
73
|
+
),
|
|
74
|
+
)
|
|
75
|
+
assert_cli_help_in_readme(text_block=stdout, marker='dev help')
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import tempfile
|
|
3
|
+
import textwrap
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from unittest.mock import patch
|
|
6
|
+
|
|
7
|
+
from bx_py_utils.test_utils.redirect import RedirectOut
|
|
8
|
+
from cli_base.cli_tools.test_utils.base_testcases import BaseTestCase
|
|
9
|
+
from freezegun import freeze_time
|
|
10
|
+
|
|
11
|
+
from PyHardLinkBackup import rebuild_databases
|
|
12
|
+
from PyHardLinkBackup.logging_setup import NoopLoggingManager
|
|
13
|
+
from PyHardLinkBackup.rebuild_databases import RebuildResult, rebuild, rebuild_one_file
|
|
14
|
+
from PyHardLinkBackup.utilities.file_size_database import FileSizeDatabase
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def sorted_rglob_paths(path: Path):
|
|
18
|
+
return sorted([str(p.relative_to(path)) for p in path.rglob('*')])
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def sorted_rglob_files(path: Path):
|
|
22
|
+
return sorted([str(p.relative_to(path)) for p in path.rglob('*') if p.is_file()])
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class RebuildDatabaseTestCase(BaseTestCase):
|
|
26
|
+
maxDiff = None
|
|
27
|
+
|
|
28
|
+
def test_happy_path(self):
|
|
29
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
30
|
+
temp_path = Path(temp_dir).resolve()
|
|
31
|
+
|
|
32
|
+
backup_root = temp_path / 'backup'
|
|
33
|
+
|
|
34
|
+
with self.assertRaises(SystemExit), RedirectOut() as redirected_out:
|
|
35
|
+
rebuild(backup_root, log_manager=NoopLoggingManager())
|
|
36
|
+
|
|
37
|
+
self.assertEqual(redirected_out.stderr, '')
|
|
38
|
+
self.assertEqual(redirected_out.stdout, f'Error: Backup directory "{backup_root}" does not exist!\n')
|
|
39
|
+
|
|
40
|
+
backup_root.mkdir()
|
|
41
|
+
|
|
42
|
+
with self.assertRaises(SystemExit), RedirectOut() as redirected_out:
|
|
43
|
+
rebuild(backup_root, log_manager=NoopLoggingManager())
|
|
44
|
+
|
|
45
|
+
self.assertEqual(redirected_out.stderr, '')
|
|
46
|
+
self.assertIn('hidden ".phlb" configuration directory is missing', redirected_out.stdout)
|
|
47
|
+
|
|
48
|
+
phlb_conf_dir = backup_root / '.phlb'
|
|
49
|
+
phlb_conf_dir.mkdir()
|
|
50
|
+
|
|
51
|
+
#######################################################################################
|
|
52
|
+
# Run on empty backup directory:
|
|
53
|
+
|
|
54
|
+
self.assertEqual(sorted_rglob_paths(backup_root), ['.phlb'])
|
|
55
|
+
|
|
56
|
+
with (
|
|
57
|
+
self.assertLogs('PyHardLinkBackup', level=logging.DEBUG),
|
|
58
|
+
RedirectOut() as redirected_out,
|
|
59
|
+
freeze_time('2026-01-16T12:34:56Z', auto_tick_seconds=0),
|
|
60
|
+
):
|
|
61
|
+
rebuild_result = rebuild(backup_root, log_manager=NoopLoggingManager())
|
|
62
|
+
self.assertEqual(
|
|
63
|
+
rebuild_result,
|
|
64
|
+
RebuildResult(
|
|
65
|
+
process_count=0,
|
|
66
|
+
process_size=0,
|
|
67
|
+
added_size_count=0,
|
|
68
|
+
added_hash_count=0,
|
|
69
|
+
error_count=0,
|
|
70
|
+
),
|
|
71
|
+
)
|
|
72
|
+
self.assertEqual(
|
|
73
|
+
sorted_rglob_paths(backup_root),
|
|
74
|
+
[
|
|
75
|
+
'.phlb',
|
|
76
|
+
'.phlb/hash-lookup',
|
|
77
|
+
'.phlb/size-lookup',
|
|
78
|
+
'2026-01-16-123456-rebuild-summary.txt',
|
|
79
|
+
],
|
|
80
|
+
)
|
|
81
|
+
self.assertEqual(redirected_out.stderr, '')
|
|
82
|
+
|
|
83
|
+
#######################################################################################
|
|
84
|
+
# Add one backuped file and run again:
|
|
85
|
+
|
|
86
|
+
snapshot_path = backup_root / 'source-name' / '2026-01-15-181709'
|
|
87
|
+
snapshot_path.mkdir(parents=True)
|
|
88
|
+
|
|
89
|
+
minimum_file_content = 'X' * FileSizeDatabase.MIN_SIZE
|
|
90
|
+
(snapshot_path / 'file1.txt').write_text(minimum_file_content)
|
|
91
|
+
|
|
92
|
+
with (
|
|
93
|
+
self.assertLogs('PyHardLinkBackup', level=logging.DEBUG),
|
|
94
|
+
RedirectOut() as redirected_out,
|
|
95
|
+
freeze_time('2026-01-16T12:34:56Z', auto_tick_seconds=0),
|
|
96
|
+
):
|
|
97
|
+
rebuild_result = rebuild(backup_root, log_manager=NoopLoggingManager())
|
|
98
|
+
self.assertEqual(
|
|
99
|
+
sorted_rglob_paths(backup_root),
|
|
100
|
+
[
|
|
101
|
+
'.phlb',
|
|
102
|
+
'.phlb/hash-lookup',
|
|
103
|
+
'.phlb/hash-lookup/bb',
|
|
104
|
+
'.phlb/hash-lookup/bb/c4',
|
|
105
|
+
'.phlb/hash-lookup/bb/c4/bbc4de2ca238d1ec41fb622b75b5cf7d31a6d2ac92405043dd8f8220364fefc8',
|
|
106
|
+
'.phlb/size-lookup',
|
|
107
|
+
'.phlb/size-lookup/10',
|
|
108
|
+
'.phlb/size-lookup/10/00',
|
|
109
|
+
'.phlb/size-lookup/10/00/1000',
|
|
110
|
+
'2026-01-16-123456-rebuild-summary.txt',
|
|
111
|
+
'source-name',
|
|
112
|
+
'source-name/2026-01-15-181709',
|
|
113
|
+
'source-name/2026-01-15-181709/SHA256SUMS',
|
|
114
|
+
'source-name/2026-01-15-181709/file1.txt',
|
|
115
|
+
],
|
|
116
|
+
)
|
|
117
|
+
self.assertEqual(
|
|
118
|
+
sorted_rglob_files(backup_root),
|
|
119
|
+
[
|
|
120
|
+
'.phlb/hash-lookup/bb/c4/bbc4de2ca238d1ec41fb622b75b5cf7d31a6d2ac92405043dd8f8220364fefc8',
|
|
121
|
+
'.phlb/size-lookup/10/00/1000',
|
|
122
|
+
'2026-01-16-123456-rebuild-summary.txt',
|
|
123
|
+
'source-name/2026-01-15-181709/SHA256SUMS',
|
|
124
|
+
'source-name/2026-01-15-181709/file1.txt',
|
|
125
|
+
],
|
|
126
|
+
)
|
|
127
|
+
self.assertEqual(redirected_out.stderr, '')
|
|
128
|
+
self.assertEqual(
|
|
129
|
+
rebuild_result,
|
|
130
|
+
RebuildResult(
|
|
131
|
+
process_count=1,
|
|
132
|
+
process_size=1000,
|
|
133
|
+
added_size_count=1,
|
|
134
|
+
added_hash_count=1,
|
|
135
|
+
error_count=0,
|
|
136
|
+
hash_verified_count=0,
|
|
137
|
+
hash_mismatch_count=0,
|
|
138
|
+
hash_not_found_count=1,
|
|
139
|
+
),
|
|
140
|
+
)
|
|
141
|
+
self.assertEqual(
|
|
142
|
+
(snapshot_path / 'SHA256SUMS').read_text(),
|
|
143
|
+
'bbc4de2ca238d1ec41fb622b75b5cf7d31a6d2ac92405043dd8f8220364fefc8 file1.txt\n',
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
#######################################################################################
|
|
147
|
+
# Add a file with same content and run again:
|
|
148
|
+
|
|
149
|
+
minimum_file_content = 'X' * FileSizeDatabase.MIN_SIZE
|
|
150
|
+
(snapshot_path / 'same_content.txt').write_text(minimum_file_content)
|
|
151
|
+
|
|
152
|
+
with (
|
|
153
|
+
self.assertLogs('PyHardLinkBackup', level=logging.DEBUG) as logs,
|
|
154
|
+
RedirectOut() as redirected_out,
|
|
155
|
+
freeze_time('2026-01-16T12:34:56Z', auto_tick_seconds=0),
|
|
156
|
+
):
|
|
157
|
+
rebuild_result = rebuild(backup_root, log_manager=NoopLoggingManager())
|
|
158
|
+
# No new hash of size entries, just the new file:
|
|
159
|
+
self.assertEqual(
|
|
160
|
+
sorted_rglob_files(backup_root),
|
|
161
|
+
[
|
|
162
|
+
'.phlb/hash-lookup/bb/c4/bbc4de2ca238d1ec41fb622b75b5cf7d31a6d2ac92405043dd8f8220364fefc8',
|
|
163
|
+
'.phlb/size-lookup/10/00/1000',
|
|
164
|
+
'2026-01-16-123456-rebuild-summary.txt',
|
|
165
|
+
'source-name/2026-01-15-181709/SHA256SUMS',
|
|
166
|
+
'source-name/2026-01-15-181709/file1.txt',
|
|
167
|
+
'source-name/2026-01-15-181709/same_content.txt',
|
|
168
|
+
],
|
|
169
|
+
)
|
|
170
|
+
self.assertEqual(redirected_out.stderr, '')
|
|
171
|
+
self.assertEqual(
|
|
172
|
+
rebuild_result,
|
|
173
|
+
RebuildResult(
|
|
174
|
+
process_count=3,
|
|
175
|
+
process_size=2000,
|
|
176
|
+
added_size_count=0,
|
|
177
|
+
added_hash_count=0,
|
|
178
|
+
error_count=0,
|
|
179
|
+
hash_verified_count=1, # Existing file verified successfully
|
|
180
|
+
hash_mismatch_count=0,
|
|
181
|
+
hash_not_found_count=1, # One file added
|
|
182
|
+
),
|
|
183
|
+
'\n'.join(logs.output) + redirected_out.stdout,
|
|
184
|
+
)
|
|
185
|
+
self.assertEqual(
|
|
186
|
+
(snapshot_path / 'SHA256SUMS').read_text(),
|
|
187
|
+
textwrap.dedent("""\
|
|
188
|
+
bbc4de2ca238d1ec41fb622b75b5cf7d31a6d2ac92405043dd8f8220364fefc8 file1.txt
|
|
189
|
+
bbc4de2ca238d1ec41fb622b75b5cf7d31a6d2ac92405043dd8f8220364fefc8 same_content.txt
|
|
190
|
+
"""),
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
#######################################################################################
|
|
194
|
+
# Test error handling
|
|
195
|
+
|
|
196
|
+
def rebuild_one_file_mock(*, entry, **kwargs):
|
|
197
|
+
if entry.name == 'file1.txt':
|
|
198
|
+
raise IOError('Bam!')
|
|
199
|
+
return rebuild_one_file(entry=entry, **kwargs)
|
|
200
|
+
|
|
201
|
+
with (
|
|
202
|
+
self.assertLogs('PyHardLinkBackup', level=logging.ERROR) as logs,
|
|
203
|
+
RedirectOut() as redirected_out,
|
|
204
|
+
patch.object(rebuild_databases, 'rebuild_one_file', rebuild_one_file_mock),
|
|
205
|
+
):
|
|
206
|
+
rebuild_result = rebuild(backup_root, log_manager=NoopLoggingManager())
|
|
207
|
+
logs = ''.join(logs.output)
|
|
208
|
+
self.assertIn(f'Backup {snapshot_path}/file1.txt OSError: Bam!\n', logs)
|
|
209
|
+
self.assertIn('\nTraceback (most recent call last):\n', logs)
|
|
210
|
+
self.assertEqual(redirected_out.stderr, '')
|
|
211
|
+
|
|
212
|
+
self.assertEqual(
|
|
213
|
+
rebuild_result,
|
|
214
|
+
RebuildResult(
|
|
215
|
+
process_count=2,
|
|
216
|
+
process_size=1000,
|
|
217
|
+
added_size_count=0,
|
|
218
|
+
added_hash_count=0,
|
|
219
|
+
error_count=1, # <<< one file caused error
|
|
220
|
+
hash_verified_count=1,
|
|
221
|
+
hash_mismatch_count=0,
|
|
222
|
+
hash_not_found_count=0,
|
|
223
|
+
),
|
|
224
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class HashAlreadyExistsError(ValueError):
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FileHashDatabase:
|
|
10
|
+
"""DocWrite: README.md ## FileHashDatabase
|
|
11
|
+
A simple "database" to store file content hash <-> relative path mappings.
|
|
12
|
+
Uses a directory structure to avoid too many files in a single directory.
|
|
13
|
+
Path structure:
|
|
14
|
+
{base_dst}/.phlb/hash-lookup/{XX}/{YY}/{hash}
|
|
15
|
+
e.g.:
|
|
16
|
+
hash '12ab000a1b2c3...' results in: {base_dst}/.phlb/hash-lookup/12/ab/12ab000a1b2c3...
|
|
17
|
+
|
|
18
|
+
Notes:
|
|
19
|
+
* Hash length will be not validated, so it can be used with any hash algorithm.
|
|
20
|
+
* The "relative path" that will be stored is not validated, so it can be any string.
|
|
21
|
+
* We don't "cache" anything in Memory, to avoid high memory consumption for large datasets.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, backup_root: Path, phlb_conf_dir: Path):
|
|
25
|
+
self.backup_root = backup_root
|
|
26
|
+
self.base_path = phlb_conf_dir / 'hash-lookup'
|
|
27
|
+
self.base_path.mkdir(parents=False, exist_ok=True)
|
|
28
|
+
|
|
29
|
+
def _get_hash_path(self, hash: str) -> Path:
|
|
30
|
+
first_dir_name = hash[:2]
|
|
31
|
+
second_dir_name = hash[2:4]
|
|
32
|
+
hash_path = self.base_path / first_dir_name / second_dir_name / hash
|
|
33
|
+
return hash_path
|
|
34
|
+
|
|
35
|
+
def __contains__(self, hash: str) -> bool:
|
|
36
|
+
hash_path = self._get_hash_path(hash)
|
|
37
|
+
return hash_path.exists()
|
|
38
|
+
|
|
39
|
+
def get(self, hash: str) -> Path | None:
|
|
40
|
+
hash_path = self._get_hash_path(hash)
|
|
41
|
+
try:
|
|
42
|
+
rel_file_path = hash_path.read_text()
|
|
43
|
+
except FileNotFoundError:
|
|
44
|
+
return None
|
|
45
|
+
else:
|
|
46
|
+
abs_file_path = self.backup_root / rel_file_path
|
|
47
|
+
if not abs_file_path.is_file():
|
|
48
|
+
logging.warning('Hash database entry found, but file does not exist: %s', abs_file_path)
|
|
49
|
+
hash_path.unlink()
|
|
50
|
+
return None
|
|
51
|
+
return abs_file_path
|
|
52
|
+
|
|
53
|
+
def __setitem__(self, hash: str, abs_file_path: Path):
|
|
54
|
+
hash_path = self._get_hash_path(hash)
|
|
55
|
+
hash_path.parent.mkdir(parents=True, exist_ok=True)
|
|
56
|
+
|
|
57
|
+
# File should be found before and results in hardlink creation!
|
|
58
|
+
# So deny change of existing hashes:
|
|
59
|
+
if hash_path.exists():
|
|
60
|
+
raise HashAlreadyExistsError(f'Hash {hash} already exists in the database!')
|
|
61
|
+
|
|
62
|
+
hash_path.write_text(str(abs_file_path.relative_to(self.backup_root)))
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class FileSizeDatabase:
|
|
5
|
+
"""DocWrite: README.md ## FileSizeDatabase
|
|
6
|
+
A simple "database" to track which file sizes have been seen.
|
|
7
|
+
|
|
8
|
+
Uses a directory structure to avoid too many files in a single directory.
|
|
9
|
+
We don't "cache" anything in Memory, to avoid high memory consumption for large datasets.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
MIN_SIZE = 1000 # no padding is made, so the min size is 1000 bytes!
|
|
13
|
+
|
|
14
|
+
def __init__(self, phlb_conf_dir: Path):
|
|
15
|
+
self.base_path = phlb_conf_dir / 'size-lookup'
|
|
16
|
+
self.base_path.mkdir(parents=False, exist_ok=True)
|
|
17
|
+
|
|
18
|
+
def _get_size_path(self, size: int) -> Path:
|
|
19
|
+
assert size >= self.MIN_SIZE, f'Size must be at least {self.MIN_SIZE} bytes'
|
|
20
|
+
size_str = str(size)
|
|
21
|
+
|
|
22
|
+
"""DocWrite: README.md ## FileSizeDatabase
|
|
23
|
+
Path structure:
|
|
24
|
+
* `{base_dst}/.phlb/size-lookup/{XX}/{YY}/{size}`
|
|
25
|
+
|
|
26
|
+
e.g.:
|
|
27
|
+
|
|
28
|
+
* `1234567890` results in: `{base_dst}/.phlb/size-lookup/12/34/1234567890`
|
|
29
|
+
"""
|
|
30
|
+
first_dir_name = size_str[:2]
|
|
31
|
+
second_dir_name = size_str[2:4]
|
|
32
|
+
size_path = self.base_path / first_dir_name / second_dir_name / size_str
|
|
33
|
+
return size_path
|
|
34
|
+
|
|
35
|
+
def __contains__(self, size: int) -> bool:
|
|
36
|
+
size_path = self._get_size_path(size)
|
|
37
|
+
return size_path.exists()
|
|
38
|
+
|
|
39
|
+
def add(self, size: int):
|
|
40
|
+
size_path = self._get_size_path(size)
|
|
41
|
+
if not size_path.exists():
|
|
42
|
+
size_path.parent.mkdir(parents=True, exist_ok=True)
|
|
43
|
+
|
|
44
|
+
"""DocWrite: README.md ## FileSizeDatabase
|
|
45
|
+
All files are created empty, as we only care about their existence."""
|
|
46
|
+
size_path.touch(exist_ok=False)
|