matterbak 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matterbak/__init__.py +6 -0
- matterbak/channeldata.py +148 -0
- matterbak/dump.py +129 -0
- matterbak/hashablematterdata.py +16 -0
- matterbak/ignoresignals.py +106 -0
- matterbak/matterbak.py +430 -0
- matterbak/mattermerge.py +88 -0
- matterbak/mattermostapi.py +147 -0
- matterbak/teams.py +69 -0
- matterbak/users.py +158 -0
- matterbak-0.4.0.dist-info/METADATA +307 -0
- matterbak-0.4.0.dist-info/RECORD +16 -0
- matterbak-0.4.0.dist-info/WHEEL +5 -0
- matterbak-0.4.0.dist-info/entry_points.txt +2 -0
- matterbak-0.4.0.dist-info/licenses/LICENSE +21 -0
- matterbak-0.4.0.dist-info/top_level.txt +1 -0
matterbak/__init__.py
ADDED
matterbak/channeldata.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Provide class ChannelData
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import pathlib as pl
|
|
8
|
+
|
|
9
|
+
from . import dump
|
|
10
|
+
|
|
11
|
+
files_subdir = pl.Path('files')
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ChannelData:
|
|
15
|
+
# pylint: disable = too-few-public-methods, too-many-instance-attributes
|
|
16
|
+
"""Class to store channel data and back it up"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, init, name, channel, channels_dir):
|
|
19
|
+
"""Init
|
|
20
|
+
|
|
21
|
+
init: the Init instance
|
|
22
|
+
name: name for the channel data file and its subdir
|
|
23
|
+
channel: channel data
|
|
24
|
+
channels_dir: pathlib.Path with the dir to store the data in
|
|
25
|
+
"""
|
|
26
|
+
self.init = init
|
|
27
|
+
self.name = name
|
|
28
|
+
self.channel = channel
|
|
29
|
+
if 'id' not in self.channel:
|
|
30
|
+
raise KeyError("key 'id' not found in channel")
|
|
31
|
+
self.channels_dir = channels_dir
|
|
32
|
+
self._threads_filename = f"{self.name}{dump.FILENAME_SEPARATOR}{dump.SUFFIX_THREADS}"
|
|
33
|
+
self.posts_dir = (self.channels_dir /
|
|
34
|
+
dump.make_filename(self.channel['id'], name=self.name))
|
|
35
|
+
self.files_dir = self.posts_dir / files_subdir
|
|
36
|
+
self.files_dir.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
self._load_threads()
|
|
38
|
+
|
|
39
|
+
def _get_latest_post_id(self):
|
|
40
|
+
"""Return latest ID of posts in posts_dir
|
|
41
|
+
|
|
42
|
+
This function assumes that the file names begin with a timestamps, such that
|
|
43
|
+
the latest post has the lexicographically highest name.
|
|
44
|
+
|
|
45
|
+
posts_dir: pathlib.Path of a dir with json files containing posts data
|
|
46
|
+
|
|
47
|
+
return: post ID contained in the file with the max file name or None
|
|
48
|
+
"""
|
|
49
|
+
latest_post_file = self.posts_dir / ' '
|
|
50
|
+
for post_file in self.posts_dir.iterdir():
|
|
51
|
+
if post_file.suffix.lower() != dump.JSON_EXTENSION:
|
|
52
|
+
continue
|
|
53
|
+
if post_file.name > latest_post_file.name:
|
|
54
|
+
latest_post_file = post_file
|
|
55
|
+
|
|
56
|
+
if latest_post_file.exists():
|
|
57
|
+
with latest_post_file.open(encoding="utf8") as post_file:
|
|
58
|
+
post = json.load(post_file)
|
|
59
|
+
return post.get('id')
|
|
60
|
+
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
def _load_threads(self):
|
|
64
|
+
"""Load thread data from backup"""
|
|
65
|
+
self._threads = {}
|
|
66
|
+
|
|
67
|
+
threads_path = (self.channels_dir /
|
|
68
|
+
dump.make_filename(
|
|
69
|
+
self.channel['id'], name=self._threads_filename, extension=dump.JSON_EXTENSION))
|
|
70
|
+
if threads_path.is_file():
|
|
71
|
+
with threads_path.open(encoding="utf8") as threads_file:
|
|
72
|
+
threads_json = json.load(threads_file)
|
|
73
|
+
# Has the file the new format with root_ids as keys?
|
|
74
|
+
# (The old file contained a list of lists.)
|
|
75
|
+
# If not ignore loaded file. It will be overwritten with the new format.
|
|
76
|
+
if isinstance(threads_json, dict):
|
|
77
|
+
self._threads = {root_id: set(post_ids)
|
|
78
|
+
for root_id, post_ids in threads_json.items()}
|
|
79
|
+
|
|
80
|
+
def _save_post(self, post):
|
|
81
|
+
"""Backup a post and its files"""
|
|
82
|
+
num_files = 0
|
|
83
|
+
for file_desc in post["metadata"].get("files", []):
|
|
84
|
+
file_id = file_desc["id"]
|
|
85
|
+
dump.dump_content(self.files_dir, file_desc)
|
|
86
|
+
file_respone = self.init.matter.get_file(file_id)
|
|
87
|
+
if file_respone.ok:
|
|
88
|
+
# extension is contained in name
|
|
89
|
+
file_dump_path = (self.files_dir /
|
|
90
|
+
dump.make_filename(file_id, name=file_desc['name']))
|
|
91
|
+
file_dump_path.write_bytes(file_respone.content)
|
|
92
|
+
num_files += 1
|
|
93
|
+
else:
|
|
94
|
+
print(
|
|
95
|
+
f"Cannot retrieve the file '{file_desc['name']}'"
|
|
96
|
+
f"posted to channel '{self.name}': {file_respone.text}")
|
|
97
|
+
return num_files
|
|
98
|
+
|
|
99
|
+
def _update_threads(self, post):
|
|
100
|
+
"""Update thread data with new post"""
|
|
101
|
+
root_id = post['root_id']
|
|
102
|
+
if root_id:
|
|
103
|
+
if root_id not in self._threads:
|
|
104
|
+
self._threads[root_id] = set()
|
|
105
|
+
self._threads[root_id].add(post['id'])
|
|
106
|
+
|
|
107
|
+
def backup(self):
|
|
108
|
+
"""Download channel data and all its posts and files"""
|
|
109
|
+
|
|
110
|
+
dump.dump_content(self.channels_dir, self.channel, name=self.name)
|
|
111
|
+
|
|
112
|
+
members = self.init.users.get_group_members(self.channel)
|
|
113
|
+
dump.dump_content(
|
|
114
|
+
self.channels_dir, members, id_=self.channel['id'],
|
|
115
|
+
name=f"{self.name}{dump.FILENAME_SEPARATOR}{dump.SUFFIX_MEMBERS}")
|
|
116
|
+
|
|
117
|
+
if self.init.options.update_old_posts:
|
|
118
|
+
latest_id = None
|
|
119
|
+
else:
|
|
120
|
+
latest_id = self._get_latest_post_id()
|
|
121
|
+
|
|
122
|
+
num_posts = 0
|
|
123
|
+
num_files = 0
|
|
124
|
+
for post in self.init.matter.get_posts_for_channel(self.channel['id'], after=latest_id):
|
|
125
|
+
self.init.rate_limiter.wait()
|
|
126
|
+
proggress_symbol = '.'
|
|
127
|
+
old_content = dump.dump_content(
|
|
128
|
+
self.posts_dir, post, with_timestamp=True, return_old_content=True)
|
|
129
|
+
if (not old_content) or (old_content['update_at'] < post['update_at']):
|
|
130
|
+
proggress_symbol = '+'
|
|
131
|
+
num_posts += 1
|
|
132
|
+
num_files += self._save_post(post)
|
|
133
|
+
|
|
134
|
+
# We update the threads in any case although thread relations cannot be changed
|
|
135
|
+
# because this will update the thread file format.
|
|
136
|
+
self._update_threads(post)
|
|
137
|
+
|
|
138
|
+
print(proggress_symbol, end='', flush=True)
|
|
139
|
+
|
|
140
|
+
# Newline after progress dots
|
|
141
|
+
print()
|
|
142
|
+
|
|
143
|
+
threads_json = {root_id: list(post_ids)
|
|
144
|
+
for root_id, post_ids in self._threads.items()}
|
|
145
|
+
dump.dump_content(self.channels_dir, threads_json,
|
|
146
|
+
id_=self.channel['id'], name=self._threads_filename)
|
|
147
|
+
|
|
148
|
+
return num_posts, num_files
|
matterbak/dump.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Provide functions to dump data into JSON files
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
import datetime
|
|
7
|
+
import json
|
|
8
|
+
import pathlib as pl
|
|
9
|
+
|
|
10
|
+
from .ignoresignals import IgnoreSignals
|
|
11
|
+
|
|
12
|
+
JSON_EXTENSION = '.json'
|
|
13
|
+
# Separator between parts of a filename
|
|
14
|
+
FILENAME_SEPARATOR = '__'
|
|
15
|
+
# Format for timestamps in file names
|
|
16
|
+
TIMESTAMP_FORMAT = "%Y%m%d-%H%M%S%f"
|
|
17
|
+
|
|
18
|
+
# Subdirs below data_dir to store the related downloads
|
|
19
|
+
teams_subdir = pl.Path('teams')
|
|
20
|
+
groups_subdir = pl.Path('groups')
|
|
21
|
+
direct_subdir = pl.Path('direct')
|
|
22
|
+
emojis_subdir = pl.Path('emojis')
|
|
23
|
+
users_subdir = pl.Path('users')
|
|
24
|
+
files_subdir = pl.Path('files')
|
|
25
|
+
|
|
26
|
+
# Suffixes for types of data files
|
|
27
|
+
SUFFIX_MEMBERS = 'members'
|
|
28
|
+
SUFFIX_THREADS = 'threads'
|
|
29
|
+
SUFFIX_ICON = 'icon'
|
|
30
|
+
SUFFIX_IMAGE = 'image'
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def make_filename(id_, name=None, extension='', mm_timestamp=None):
|
|
34
|
+
"""Make a filename for a backup file
|
|
35
|
+
|
|
36
|
+
id_: Mattermost ID to insert into the filename
|
|
37
|
+
name: optional name to append
|
|
38
|
+
extension: optional extension for the filename
|
|
39
|
+
mm_timestamp: optional Mattermost timestamp (Unix time in milliseconds)
|
|
40
|
+
|
|
41
|
+
return: filename
|
|
42
|
+
"""
|
|
43
|
+
filename_parts = []
|
|
44
|
+
if mm_timestamp:
|
|
45
|
+
now = datetime.datetime.fromtimestamp(mm_timestamp / 1000)
|
|
46
|
+
filename_parts.append(now.strftime(TIMESTAMP_FORMAT))
|
|
47
|
+
filename_parts.append(id_)
|
|
48
|
+
if name:
|
|
49
|
+
filename_parts.append(name)
|
|
50
|
+
|
|
51
|
+
return FILENAME_SEPARATOR.join(filename_parts) + extension
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def dump_image(directory, id_, image_loader, label=None, skip_existing=False):
|
|
55
|
+
"""Helper to download and save an image from Mattermost
|
|
56
|
+
|
|
57
|
+
Calls make_filename with id_, label as name, and extension derived from the
|
|
58
|
+
content type returned from Mattermost.
|
|
59
|
+
|
|
60
|
+
directory: pathlib.Path of the folder to store the image in
|
|
61
|
+
id_: Mattermost ID as prefix for the filename
|
|
62
|
+
image_loader: function returning an image Response object from Mattermost API
|
|
63
|
+
label: label to append to filename
|
|
64
|
+
skip_existing: if True skip download if image file already exists
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
found_image_files = [f for f in directory.glob(id_+'*')
|
|
68
|
+
if f.suffix != JSON_EXTENSION and f.is_file()]
|
|
69
|
+
if skip_existing and found_image_files:
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
# The new image file may have a different extension so delete all existing
|
|
73
|
+
# image files.
|
|
74
|
+
for image in found_image_files:
|
|
75
|
+
image.unlink(missing_ok=True)
|
|
76
|
+
|
|
77
|
+
response = image_loader()
|
|
78
|
+
if not response.ok:
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
content_type_prefix = 'image/'
|
|
82
|
+
content_type = response.headers.get('content-type', '')
|
|
83
|
+
if not content_type.startswith(content_type_prefix):
|
|
84
|
+
print(f"Cannot store image of type '{content_type}' for ID {id_}")
|
|
85
|
+
return
|
|
86
|
+
extension = '.' + content_type.removeprefix(content_type_prefix)
|
|
87
|
+
|
|
88
|
+
with IgnoreSignals():
|
|
89
|
+
path = (directory /
|
|
90
|
+
make_filename(id_=id_, name=label, extension=extension))
|
|
91
|
+
path.write_bytes(response.content)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def dump_content(directory, content, id_=None, name=None, with_timestamp=False,
|
|
95
|
+
return_old_content=False):
|
|
96
|
+
# pylint: disable = too-many-arguments, too-many-positional-arguments
|
|
97
|
+
"""Helper to save the content as JSON file
|
|
98
|
+
|
|
99
|
+
Calls make_filename with id_ (if given else content['id']), name, and
|
|
100
|
+
with_timestamp to create the filename.
|
|
101
|
+
|
|
102
|
+
directory: pathlib.Path of the folder to store the file in
|
|
103
|
+
content: data to store
|
|
104
|
+
id_: Mattermost ID to be integrated into filename, if None use
|
|
105
|
+
content['id'] instead
|
|
106
|
+
name: name (without .json extension) of the file, can be empty
|
|
107
|
+
with_timestamp: set to True to prefix filename with content's creation time
|
|
108
|
+
return_old_content: if True content of file to be overwritten is returned
|
|
109
|
+
or None if there was no content file
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
if not id_:
|
|
113
|
+
id_ = content['id']
|
|
114
|
+
mm_timestamp = content["create_at"] if with_timestamp else None
|
|
115
|
+
|
|
116
|
+
path = (directory /
|
|
117
|
+
make_filename(id_, name=name, extension=JSON_EXTENSION,
|
|
118
|
+
mm_timestamp=mm_timestamp))
|
|
119
|
+
|
|
120
|
+
old_content = None
|
|
121
|
+
if return_old_content and path.is_file():
|
|
122
|
+
with path.open(encoding="utf8") as old_file:
|
|
123
|
+
old_content = json.load(old_file)
|
|
124
|
+
|
|
125
|
+
with IgnoreSignals():
|
|
126
|
+
with path.open(mode="w", encoding="utf8") as dump_file:
|
|
127
|
+
json.dump(content, dump_file)
|
|
128
|
+
|
|
129
|
+
return old_content
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Provide class HashableMatterData
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class HashableMatterData(dict):
|
|
7
|
+
"""Extends the dict of a mattermost object by hash method to enable storing in a set
|
|
8
|
+
|
|
9
|
+
This class can be initialized with the original dict of the mattermost object.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __eq__(self, other):
|
|
13
|
+
return self['id'] == other['id']
|
|
14
|
+
|
|
15
|
+
def __hash__(self):
|
|
16
|
+
return hash(self['id'])
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""
|
|
2
|
+
A simple class to temporarily ignore specific signals (e.g., SIGINT, SIGTERM)
|
|
3
|
+
during critical operations like file writing.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import signal
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class IgnoreSignals():
|
|
11
|
+
"""Context manager to temporarily ignore specified signals"""
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
signals=None,
|
|
15
|
+
print_message_on_signal=None,
|
|
16
|
+
delay_signals=True):
|
|
17
|
+
"""
|
|
18
|
+
Temporarily ignore specified signals (e.g., Ctrl+C, kill) during
|
|
19
|
+
critical operations.
|
|
20
|
+
|
|
21
|
+
Example:
|
|
22
|
+
|
|
23
|
+
>>> with IgnoreSignals([signal.SIGINT, signal.SIGTERM]):
|
|
24
|
+
... # do critical work like file writing
|
|
25
|
+
|
|
26
|
+
signals (list): List of signal numbers to ignore
|
|
27
|
+
(default: [signal.SIGINT, signal.SIGTERM]).
|
|
28
|
+
Use `None` to accept the default.
|
|
29
|
+
print_message_on_signal (str or callable or None):
|
|
30
|
+
message to be printed on signal
|
|
31
|
+
* If None (default) the f-string
|
|
32
|
+
f'ignoring signal {signum} until write is finished'
|
|
33
|
+
is used.
|
|
34
|
+
* If a callable (e.g. lambda function):
|
|
35
|
+
called with (signum, frame). This function could print other
|
|
36
|
+
messages build with input.
|
|
37
|
+
* If bool(print_message_on_signal) is True
|
|
38
|
+
the variable print_message_on_signal is printed.
|
|
39
|
+
* Otherwise (e. g. False or '') no output.
|
|
40
|
+
delay_signals:
|
|
41
|
+
If True the default handler will be called just after reverting
|
|
42
|
+
Only the last ignored signal is re-raised.
|
|
43
|
+
Attempting to re-raise multiple signals would risk unpredictable
|
|
44
|
+
behavior and is intentionally omitted.
|
|
45
|
+
|
|
46
|
+
return: dict with actual handlers
|
|
47
|
+
"""
|
|
48
|
+
if signals is None:
|
|
49
|
+
signals = [signal.SIGINT, signal.SIGTERM]
|
|
50
|
+
self.signals = signals
|
|
51
|
+
self.print_message_on_signal = print_message_on_signal
|
|
52
|
+
self.default_handlers = []
|
|
53
|
+
self.update_default_handlers()
|
|
54
|
+
self.ignored_signum = None
|
|
55
|
+
self.delay_signals = delay_signals
|
|
56
|
+
|
|
57
|
+
def __enter__(self):
|
|
58
|
+
"""Enter 'with' context"""
|
|
59
|
+
self.ignore()
|
|
60
|
+
return self
|
|
61
|
+
|
|
62
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
63
|
+
"""Leave 'with' context"""
|
|
64
|
+
self.revert()
|
|
65
|
+
|
|
66
|
+
def update_default_handlers(self):
|
|
67
|
+
"""
|
|
68
|
+
store actual signal handlers as default handlers
|
|
69
|
+
"""
|
|
70
|
+
self.default_handlers = []
|
|
71
|
+
for sig in self.signals:
|
|
72
|
+
self.default_handlers.append([sig, signal.getsignal(sig)])
|
|
73
|
+
|
|
74
|
+
def ignore(self):
|
|
75
|
+
"""
|
|
76
|
+
set signal handlers to ignore the signals
|
|
77
|
+
"""
|
|
78
|
+
for sig in self.signals:
|
|
79
|
+
signal.signal(sig, self.ignoring_handler)
|
|
80
|
+
|
|
81
|
+
def revert(self):
|
|
82
|
+
"""
|
|
83
|
+
revert/restore to original/default signal handlers
|
|
84
|
+
|
|
85
|
+
More precisely the signal handlers defined during
|
|
86
|
+
class instance creation
|
|
87
|
+
or calling the method `update_default_handlers`
|
|
88
|
+
"""
|
|
89
|
+
for (sig, handler) in self.default_handlers:
|
|
90
|
+
signal.signal(sig, handler)
|
|
91
|
+
if self.delay_signals and (self.ignored_signum is not None):
|
|
92
|
+
os.kill(os.getpid(), self.ignored_signum)
|
|
93
|
+
self.ignored_signum = None
|
|
94
|
+
|
|
95
|
+
def ignoring_handler(self, signum, frame):
|
|
96
|
+
"""
|
|
97
|
+
handler that ignores the signal and optionally prints a message
|
|
98
|
+
or calls a function
|
|
99
|
+
"""
|
|
100
|
+
self.ignored_signum = signum
|
|
101
|
+
if self.print_message_on_signal is None:
|
|
102
|
+
print(f'ignoring signal {signum} until write is finished')
|
|
103
|
+
if callable(self.print_message_on_signal):
|
|
104
|
+
self.print_message_on_signal(signum, frame)
|
|
105
|
+
elif self.print_message_on_signal:
|
|
106
|
+
print(self.print_message_on_signal)
|