backup-helper 0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. backup-helper-0.2/PKG-INFO +133 -0
  2. backup-helper-0.2/README.md +116 -0
  3. backup-helper-0.2/backup_helper/__init__.py +0 -0
  4. backup-helper-0.2/backup_helper/__main__.py +11 -0
  5. backup-helper-0.2/backup_helper/backup_helper.py +193 -0
  6. backup-helper-0.2/backup_helper/cli.py +363 -0
  7. backup-helper-0.2/backup_helper/disk_work_queue.py +271 -0
  8. backup-helper-0.2/backup_helper/exceptions.py +52 -0
  9. backup-helper-0.2/backup_helper/helpers.py +98 -0
  10. backup-helper-0.2/backup_helper/interactive.py +194 -0
  11. backup-helper-0.2/backup_helper/py.typed +0 -0
  12. backup-helper-0.2/backup_helper/source.py +322 -0
  13. backup-helper-0.2/backup_helper/target.py +128 -0
  14. backup-helper-0.2/backup_helper/work.py +135 -0
  15. backup-helper-0.2/backup_helper.egg-info/PKG-INFO +133 -0
  16. backup-helper-0.2/backup_helper.egg-info/SOURCES.txt +27 -0
  17. backup-helper-0.2/backup_helper.egg-info/dependency_links.txt +1 -0
  18. backup-helper-0.2/backup_helper.egg-info/entry_points.txt +2 -0
  19. backup-helper-0.2/backup_helper.egg-info/requires.txt +4 -0
  20. backup-helper-0.2/backup_helper.egg-info/top_level.txt +1 -0
  21. backup-helper-0.2/pyproject.toml +38 -0
  22. backup-helper-0.2/setup.cfg +4 -0
  23. backup-helper-0.2/tests/test_backup_helper.py +525 -0
  24. backup-helper-0.2/tests/test_disk_work_queue.py +339 -0
  25. backup-helper-0.2/tests/test_helpers.py +69 -0
  26. backup-helper-0.2/tests/test_interactive.py +53 -0
  27. backup-helper-0.2/tests/test_source.py +661 -0
  28. backup-helper-0.2/tests/test_system.py +364 -0
  29. backup-helper-0.2/tests/test_target.py +153 -0
@@ -0,0 +1,133 @@
1
+ Metadata-Version: 2.1
2
+ Name: backup-helper
3
+ Version: 0.2
4
+ Summary: Helper tool for creating plain-file cold-storage archives including checksum files
5
+ Author-email: omgitsmoe <60219950+omgitsmoe@users.noreply.github.com>
6
+ Project-URL: Homepage, https://github.com/omgitsmoe/backup_helper
7
+ Project-URL: Bug Tracker, https://github.com/omgitsmoe/backup_helper/issues
8
+ Keywords: script,verify,backup,archival,bit-rot
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.8
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: checksum_helper<0.3,>=0.2.2
15
+ Provides-Extra: test
16
+ Requires-Dist: pytest<8,>=7.2; extra == "test"
17
+
18
+ # BackupHelper
19
+
20
+ A tool for simplifying the process of archiving multiple directories
21
+ onto several different drives. For each directory a checksum file
22
+ will be created, which will be verified after the transfer.
23
+
24
+ You can stage multiple sources and add targets to them.
25
+ Once you're done you can start the transfer, which will run
26
+ all copy operations at the same time, while making sure that
27
+ all disks in a transfer aren't busy with another BackupHelper operation.
28
+
29
+ ## Quick start
30
+
31
+ Add a directory as a source for copying/archiving:
32
+ ```
33
+ python -m backup_helper stage ~/Documents --alias docs
34
+ Staged: /home/m/Documents
35
+ with alias: docs
36
+ ```
37
+
38
+ By default the BackupHelper state will be saved in the file
39
+ `backup_status.json` in the current working directory.
40
+ Alternatively a custom path can be used by passing
41
+ `--status-file /path/to/status.json` to __each__ command.
42
+
43
+ Add targets to that source. Either the normalized absolute path
44
+ can be used as `source` or the alias (here: _"docs"_) if present:
45
+
46
+ ```
47
+ $ python -m backup_helper add-target docs /media/storage1/docs_2024 --alias storage1
48
+ Added target /media/storage1/docs_2024
49
+ with alias: storage1
50
+ $ python -m backup_helper add-target docs /media/storage1/docs_2024 --alias storage2
51
+ Added target /media/storage1/docs_2024
52
+ with alias: storage2
53
+ ```
54
+
55
+ Now you can use the command `start` run the whole backup process
56
+ in sequence.
57
+
58
+ ```
59
+ python -m backup_helper start
60
+ 18:22:01 - INFO - Wrote /home/m/Documents/Documents_bh_2024-02-25T18-22-01.cshd
61
+ ...
62
+ 18:22:02 - INFO -
63
+
64
+ NO MISSING FILES!
65
+
66
+ NO FAILED CHECKSUMS!
67
+
68
+ SUMMARY:
69
+ TOTAL FILES: 3
70
+ MATCHES: 3
71
+ FAILED CHECKSUMS: 0
72
+ MISSING: 0
73
+
74
+ ...
75
+
76
+ 18:22:02 - INFO - /home/m/Documents/Documents_bh_2024-02-25T18-22-01.cshd: No missing files and all files matching their hashes
77
+
78
+ ...
79
+
80
+ 18:22:02 - INFO - Successfully completed the following 5 operation(s):
81
+ Hashed '/home/m/Documents':
82
+ Hash file: /home/m/Documents/Documents_bh_2024-02-25T18-22-01.cshd
83
+ Transfer successful:
84
+ From: /home/m/Documents
85
+ To: /media/storage1/docs_2024
86
+ Transfer successful:
87
+ From: /home/m/Documents
88
+ To: /media/storage2/docs_2024
89
+ Verified transfer '/media/storage1/docs_2024':
90
+ Checked: 3
91
+ CRC Errors: 0
92
+ Missing: 0
93
+ Verified transfer '/media/storage2/docs_2024':
94
+ Checked: 3
95
+ CRC Errors: 0
96
+ Missing: 0
97
+ ```
98
+
99
+ Each part of the backup process can be run on its own and on a
100
+ specific source/target combination only. For more information
101
+ see the [backup process section](#backup-process).
102
+
103
+ ## Backup process
104
+
105
+ The backup process, which can be run automatically using the
106
+ `start` command is split into the subprocesses:
107
+
108
+ 1) Hash all source directories. The checksum file will be added to
109
+ the directory. A log file of creating the checksum file will
110
+ be written next to status JSON file.
111
+ 2) Transfer all sources to their targets. Only one read __or__ write
112
+ operation per disk will be allowed at the same time.
113
+ 3) Verify the transfer by comparing the hashes of the generated
114
+ checksum file with the hashes of the files on the target.
115
+ A log of the verification process will be written to the target.
116
+
117
+ The verification process (3) will be run last if there are more
118
+ transfer operations on a disk, so:
119
+
120
+ 1) More expensive write operations are performed first.
121
+ 2) The transferred files are less likely to be in cache when hashing.
122
+
123
+ Each part of the backup process can be run on its own and/or on a
124
+ specific source/target combination only. Required previous steps
125
+ will be run automatically.
126
+
127
+ Using the `interactive` command it's possible to add sources/targets
128
+ while the transfer is running, otherwise all running operations would
129
+ need to be completed before executing further commands.
130
+
131
+ ## Commands
132
+
133
+ See `python -m backup_helper --help`
@@ -0,0 +1,116 @@
1
+ # BackupHelper
2
+
3
+ A tool for simplifying the process of archiving multiple directories
4
+ onto several different drives. For each directory a checksum file
5
+ will be created, which will be verified after the transfer.
6
+
7
+ You can stage multiple sources and add targets to them.
8
+ Once you're done you can start the transfer, which will run
9
+ all copy operations at the same time, while making sure that
10
+ all disks in a transfer aren't busy with another BackupHelper operation.
11
+
12
+ ## Quick start
13
+
14
+ Add a directory as a source for copying/archiving:
15
+ ```
16
+ python -m backup_helper stage ~/Documents --alias docs
17
+ Staged: /home/m/Documents
18
+ with alias: docs
19
+ ```
20
+
21
+ By default the BackupHelper state will be saved in the file
22
+ `backup_status.json` in the current working directory.
23
+ Alternatively a custom path can be used by passing
24
+ `--status-file /path/to/status.json` to __each__ command.
25
+
26
+ Add targets to that source. Either the normalized absolute path
27
+ can be used as `source` or the alias (here: _"docs"_) if present:
28
+
29
+ ```
30
+ $ python -m backup_helper add-target docs /media/storage1/docs_2024 --alias storage1
31
+ Added target /media/storage1/docs_2024
32
+ with alias: storage1
33
+ $ python -m backup_helper add-target docs /media/storage1/docs_2024 --alias storage2
34
+ Added target /media/storage1/docs_2024
35
+ with alias: storage2
36
+ ```
37
+
38
+ Now you can use the command `start` run the whole backup process
39
+ in sequence.
40
+
41
+ ```
42
+ python -m backup_helper start
43
+ 18:22:01 - INFO - Wrote /home/m/Documents/Documents_bh_2024-02-25T18-22-01.cshd
44
+ ...
45
+ 18:22:02 - INFO -
46
+
47
+ NO MISSING FILES!
48
+
49
+ NO FAILED CHECKSUMS!
50
+
51
+ SUMMARY:
52
+ TOTAL FILES: 3
53
+ MATCHES: 3
54
+ FAILED CHECKSUMS: 0
55
+ MISSING: 0
56
+
57
+ ...
58
+
59
+ 18:22:02 - INFO - /home/m/Documents/Documents_bh_2024-02-25T18-22-01.cshd: No missing files and all files matching their hashes
60
+
61
+ ...
62
+
63
+ 18:22:02 - INFO - Successfully completed the following 5 operation(s):
64
+ Hashed '/home/m/Documents':
65
+ Hash file: /home/m/Documents/Documents_bh_2024-02-25T18-22-01.cshd
66
+ Transfer successful:
67
+ From: /home/m/Documents
68
+ To: /media/storage1/docs_2024
69
+ Transfer successful:
70
+ From: /home/m/Documents
71
+ To: /media/storage2/docs_2024
72
+ Verified transfer '/media/storage1/docs_2024':
73
+ Checked: 3
74
+ CRC Errors: 0
75
+ Missing: 0
76
+ Verified transfer '/media/storage2/docs_2024':
77
+ Checked: 3
78
+ CRC Errors: 0
79
+ Missing: 0
80
+ ```
81
+
82
+ Each part of the backup process can be run on its own and on a
83
+ specific source/target combination only. For more information
84
+ see the [backup process section](#backup-process).
85
+
86
+ ## Backup process
87
+
88
+ The backup process, which can be run automatically using the
89
+ `start` command is split into the subprocesses:
90
+
91
+ 1) Hash all source directories. The checksum file will be added to
92
+ the directory. A log file of creating the checksum file will
93
+ be written next to status JSON file.
94
+ 2) Transfer all sources to their targets. Only one read __or__ write
95
+ operation per disk will be allowed at the same time.
96
+ 3) Verify the transfer by comparing the hashes of the generated
97
+ checksum file with the hashes of the files on the target.
98
+ A log of the verification process will be written to the target.
99
+
100
+ The verification process (3) will be run last if there are more
101
+ transfer operations on a disk, so:
102
+
103
+ 1) More expensive write operations are performed first.
104
+ 2) The transferred files are less likely to be in cache when hashing.
105
+
106
+ Each part of the backup process can be run on its own and/or on a
107
+ specific source/target combination only. Required previous steps
108
+ will be run automatically.
109
+
110
+ Using the `interactive` command it's possible to add sources/targets
111
+ while the transfer is running, otherwise all running operations would
112
+ need to be completed before executing further commands.
113
+
114
+ ## Commands
115
+
116
+ See `python -m backup_helper --help`
File without changes
@@ -0,0 +1,11 @@
1
+ import sys
2
+
3
+ from backup_helper import cli
4
+
5
+
6
+ def main():
7
+ cli.main(sys.argv[1:])
8
+
9
+
10
+ if __name__ == '__main__':
11
+ main()
@@ -0,0 +1,193 @@
1
+ import sys
2
+ import os
3
+ import dataclasses
4
+ import json
5
+ import contextlib
6
+ import logging
7
+ import time
8
+
9
+ from typing import (
10
+ List, Optional, Dict, Any, Union, cast, Callable, Set,
11
+ Iterator, Iterable, TYPE_CHECKING, Tuple, overload
12
+ )
13
+
14
+ from backup_helper import helpers
15
+ from backup_helper.exceptions import (
16
+ SourceNotFound, TargetNotFound,
17
+ SourceAlreadyExists, AliasAlreadyExists,
18
+ )
19
+ from backup_helper.source import Source
20
+ from backup_helper.target import Target
21
+ from backup_helper import work
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class BackupHelper:
27
+ def __init__(self, sources: List[Source]):
28
+ self._sources = {}
29
+ # don't serialize this, will be set when loading, so the file can be moved!
30
+ self._working_dir = '.'
31
+ for source in sources:
32
+ self._sources[source.path] = source
33
+ if source.alias:
34
+ self._sources[source.alias] = source
35
+ self._queue = work.setup_work_queue([])
36
+
37
+ @ classmethod
38
+ def load_state(cls, path: str) -> 'BackupHelper':
39
+ if os.path.exists(path):
40
+ with open(path, "r", encoding='utf-8') as f:
41
+ contents = f.read()
42
+ bh = cls.from_json(contents)
43
+ bh._working_dir = os.path.dirname(path)
44
+ return bh
45
+ else:
46
+ return cls([])
47
+
48
+ def to_json(self) -> Dict[Any, Any]:
49
+ result = {"version": 1, "type": type(self).__name__}
50
+
51
+ sources: List[Dict[str, Any]] = []
52
+ # sources contain both path as well as alias as keys, so we have to
53
+ # deduplicate them
54
+ for source in self.unique_sources():
55
+ sources.append(source.to_json())
56
+
57
+ result["sources"] = sources
58
+
59
+ return result
60
+
61
+ @ staticmethod
62
+ def from_json(json_str: str) -> 'BackupHelper':
63
+ d = json.loads(json_str, object_hook=BackupHelper.from_json_hook)
64
+ return d
65
+
66
+ @ staticmethod
67
+ def from_json_hook(json_object: Dict[Any, Any]) -> Union[
68
+ 'BackupHelper', Source, Target, Dict[Any, Any]]:
69
+ # if this is used as object_hook in json_loads it
70
+ # will call the method iteratively as it builds the object bottom up
71
+
72
+ if "type" not in json_object:
73
+ return json_object
74
+
75
+ # version = json_object["version"]
76
+ obj_type = json_object["type"]
77
+
78
+ # dispatch to appropriate from_json method
79
+ if obj_type == "BackupHelper":
80
+ sources = json_object["sources"]
81
+ return BackupHelper(sources)
82
+ elif obj_type == "Source":
83
+ return Source.from_json(json_object)
84
+ elif obj_type == "Target":
85
+ return Target.from_json(json_object)
86
+ else:
87
+ return json_object
88
+
89
+ def unique_sources(self) -> Iterator[Source]:
90
+ # sources contain both path as well as alias as keys, so we have to
91
+ # deduplicate them
92
+ yield from helpers.unique_iterator(self._sources.values())
93
+
94
+ def save_state(self, path: str):
95
+ d = self.to_json()
96
+ with open(path, "w", encoding='utf-8') as f:
97
+ f.write(json.dumps(d))
98
+
99
+ def add_source(self, source: Source):
100
+ if source.path in self._sources:
101
+ raise SourceAlreadyExists(
102
+ f"Source '{source.path}' already exists!", source.path)
103
+
104
+ self._sources[source.path] = source
105
+ if source.alias:
106
+ if source.alias in self._sources:
107
+ raise AliasAlreadyExists(
108
+ f"Alias '{source.alias}' already exists!", source.alias)
109
+ self._sources[source.alias] = source
110
+
111
+ def get_source(self, source_key: str) -> Source:
112
+ try:
113
+ return self._sources[source_key]
114
+ except KeyError:
115
+ raise SourceNotFound(
116
+ f"Source '{source_key}' not found!", source_key)
117
+
118
+ def hash_all(self) -> None:
119
+ for s in self.unique_sources():
120
+ s.hash_queue(self._queue, log_dir=self._working_dir)
121
+ success, errors = self._queue.start_and_join_all()
122
+ work.report_results(success, errors)
123
+
124
+ def transfer_all(self) -> None:
125
+ for src in self.unique_sources():
126
+ src.transfer_queue_all(self._queue)
127
+
128
+ success, errors = self._queue.start_and_join_all()
129
+ work.report_results(success, errors)
130
+
131
+ def verify_all(self) -> None:
132
+ for src in self.unique_sources():
133
+ src.verify_target_queue_all(self._queue)
134
+
135
+ success, errors = self._queue.start_and_join_all()
136
+ work.report_results(success, errors)
137
+
138
+ def start_all(self) -> None:
139
+ for src in self.unique_sources():
140
+ src.hash_queue(self._queue, log_dir=self._working_dir)
141
+ src.transfer_queue_all(self._queue)
142
+ src.verify_target_queue_all(self._queue)
143
+
144
+ success, errors = self._queue.start_and_join_all()
145
+ work.report_results(success, errors)
146
+
147
+ def workers_running(self) -> bool:
148
+ return self._queue.workers_running()
149
+
150
+ def join(self) -> None:
151
+ self._queue.join()
152
+
153
+ def status(self, source_key: str) -> str:
154
+ try:
155
+ src = self.get_source(source_key)
156
+ except SourceNotFound as e:
157
+ return f"Source '{e.source}' not found!"
158
+ else:
159
+ return src.status()
160
+
161
+ def status_all(self) -> str:
162
+ builder = []
163
+ for source in self.unique_sources():
164
+ builder.append(f"--- Source: {source.path} ---")
165
+ builder.append(source.status())
166
+
167
+ return "\n".join(builder)
168
+
169
+
170
+ @ contextlib.contextmanager
171
+ def load_backup_state(
172
+ path: str, instance: Optional[BackupHelper] = None) -> Iterator[BackupHelper]:
173
+ """Contextmangaer that saves state on Exception"""
174
+ if instance is None:
175
+ bh = BackupHelper.load_state(path)
176
+ else:
177
+ bh = instance
178
+
179
+ try:
180
+ yield bh
181
+ except Exception:
182
+ fn, ext = os.path.splitext(path)
183
+ bh.save_state(helpers.unique_filename(f"{fn}_crash{ext}"))
184
+ raise
185
+
186
+
187
+ @ contextlib.contextmanager
188
+ def load_backup_state_save_always(
189
+ path: str, instance: Optional[BackupHelper] = None) -> Iterator[BackupHelper]:
190
+ """Contextmangaer that saves state on exit"""
191
+ with load_backup_state(path, instance) as bh:
192
+ yield bh
193
+ bh.save_state(path)