matterbak 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- matterbak-0.4.0/LICENSE +21 -0
- matterbak-0.4.0/PKG-INFO +307 -0
- matterbak-0.4.0/README.md +285 -0
- matterbak-0.4.0/pyproject.toml +49 -0
- matterbak-0.4.0/setup.cfg +4 -0
- matterbak-0.4.0/src/matterbak/__init__.py +6 -0
- matterbak-0.4.0/src/matterbak/channeldata.py +148 -0
- matterbak-0.4.0/src/matterbak/dump.py +129 -0
- matterbak-0.4.0/src/matterbak/hashablematterdata.py +16 -0
- matterbak-0.4.0/src/matterbak/ignoresignals.py +106 -0
- matterbak-0.4.0/src/matterbak/matterbak.py +430 -0
- matterbak-0.4.0/src/matterbak/mattermerge.py +88 -0
- matterbak-0.4.0/src/matterbak/mattermostapi.py +147 -0
- matterbak-0.4.0/src/matterbak/teams.py +69 -0
- matterbak-0.4.0/src/matterbak/users.py +158 -0
- matterbak-0.4.0/src/matterbak.egg-info/PKG-INFO +307 -0
- matterbak-0.4.0/src/matterbak.egg-info/SOURCES.txt +19 -0
- matterbak-0.4.0/src/matterbak.egg-info/dependency_links.txt +1 -0
- matterbak-0.4.0/src/matterbak.egg-info/entry_points.txt +2 -0
- matterbak-0.4.0/src/matterbak.egg-info/requires.txt +1 -0
- matterbak-0.4.0/src/matterbak.egg-info/top_level.txt +2 -0
matterbak-0.4.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023, 2026 Michael Behrisch, Björn Hendriks
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
matterbak-0.4.0/PKG-INFO
ADDED
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: matterbak
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Backing up mattermost channels (including files) and users
|
|
5
|
+
Author-email: Michael Behrisch <oss@behrisch.de>, Björn Hendriks <bjoern.hendriks@dlr.de>, Daniel Mohr <daniel.mohr@dlr.de>
|
|
6
|
+
Maintainer-email: Björn Hendriks <bjoern.hendriks@dlr.de>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Project-URL: Repository, https://github.com/bjhend/matterbak
|
|
9
|
+
Project-URL: Issues, https://github.com/bjhend/matterbak/issues
|
|
10
|
+
Project-URL: Changelog, https://github.com/bjhend/matterbak/blob/main/CHANGELOG.md
|
|
11
|
+
Keywords: mattermost,backup,tool,chat
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Topic :: Communications :: Chat
|
|
15
|
+
Classifier: Topic :: System :: Archiving :: Backup
|
|
16
|
+
Classifier: Topic :: Utilities
|
|
17
|
+
Requires-Python: >=3.8
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: mattermost==10.11.*
|
|
21
|
+
Dynamic: license-file
|
|
22
|
+
|
|
23
|
+
# matterbak
|
|
24
|
+
|
|
25
|
+
Back up Mattermost channels of any type with all
|
|
26
|
+
posts, threads, files, users, emojis.
|
|
27
|
+
|
|
28
|
+
A note on **personal data**: This script can download personal data of the
|
|
29
|
+
users like name, nickname, e-mail-address, picture, etc. This may cause legal
|
|
30
|
+
problems. To avoid that, call the script with option `--skip-users`. You will
|
|
31
|
+
still find user IDs in the data and which roles/permissions belong to them as
|
|
32
|
+
channel members but no personal data about the users behind those IDs. The
|
|
33
|
+
only exception are direct and group channels in the backup, which contain the
|
|
34
|
+
usernames in the filenames.
|
|
35
|
+
|
|
36
|
+
Mattermost knows several types of channels:
|
|
37
|
+
|
|
38
|
+
* Direct channels contain a chat with a single user outside of a team
|
|
39
|
+
* Group channels contain a chat with multiple users outside of a team
|
|
40
|
+
* Channels belonging to a team
|
|
41
|
+
|
|
42
|
+
You can configure for each type which channels should be backed up.
|
|
43
|
+
|
|
44
|
+
Subsequent runs of the script with the same data dir will update the saved
|
|
45
|
+
data. So you can run it once to create an initial backup and later update that
|
|
46
|
+
backup by running it again. If you later add more channels to the configuration
|
|
47
|
+
those will be downloaded as well. In case you have accidentally deleted part of
|
|
48
|
+
the files of a channel, delete all channel files and update again. Otherwise
|
|
49
|
+
updates may get broken.
|
|
50
|
+
|
|
51
|
+
We implement safe interruption handling: Writes of images and JSON files are
|
|
52
|
+
protected against partial writes due to interruptions (`Ctrl+C` (SIGINT) and
|
|
53
|
+
`kill` (SIGTERM) signals). The program delays interruptions while writing to
|
|
54
|
+
prevent data corruption. After writing finishes, normal interruption behavior
|
|
55
|
+
resumes and delayed interruptions are called -- you can safely stop the
|
|
56
|
+
program with `Ctrl+C` or `kill` at any time.
|
|
57
|
+
|
|
58
|
+
**Attention**: Updating will skip any changes to older posts unless you give
|
|
59
|
+
option `--update-old-posts`.
|
|
60
|
+
|
|
61
|
+
## Requirements
|
|
62
|
+
|
|
63
|
+
The script should work with Python 3.8 or later.
|
|
64
|
+
The [mattermost module](https://github.com/someone-somenet-org/mattermost-python-api)
|
|
65
|
+
is needed for easier API access.
|
|
66
|
+
|
|
67
|
+
## Installation
|
|
68
|
+
|
|
69
|
+
Matterbak is available as package from PyPI, so you can install it with `pip` or
|
|
70
|
+
`pipx`:
|
|
71
|
+
|
|
72
|
+
```sh
|
|
73
|
+
# Install from PyPI
|
|
74
|
+
pipx install matterbak
|
|
75
|
+
|
|
76
|
+
# Test run the tool
|
|
77
|
+
matterbak --version
|
|
78
|
+
matterbak --help
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
We recommend [`pipx`](https://pipx.pypa.io), because matterbak is an executable
|
|
82
|
+
script.
|
|
83
|
+
|
|
84
|
+
## Configuration
|
|
85
|
+
|
|
86
|
+
### Credentials
|
|
87
|
+
|
|
88
|
+
You will need a json config (default name `credentials.json`) with the
|
|
89
|
+
following format
|
|
90
|
+
|
|
91
|
+
```json
|
|
92
|
+
{
|
|
93
|
+
"user": "my_name",
|
|
94
|
+
"password": "super_secret_pass",
|
|
95
|
+
"url": "https://mattermost.server.org/api"
|
|
96
|
+
}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Username is the name you have in Mattermost. You can find it by clicking on
|
|
100
|
+
your avatar in the top right corner
|
|
101
|
+
as the name after the `@` sign. Do not include the `@`.
|
|
102
|
+
|
|
103
|
+
If you login via GitLab or a comparable service, replace `password` wth `token`
|
|
104
|
+
and enter the *MMAUTHTOKEN* here. To retrieve it, login via your browser and
|
|
105
|
+
inspect the cookies for *MMAUTHTOKEN*
|
|
106
|
+
|
|
107
|
+
1. Open DevTools (F12)
|
|
108
|
+
2. Go to Application (Chrome/Edge) or Storage (Firefox)
|
|
109
|
+
3. Navigate to Cookies, look at your Mattermost domain
|
|
110
|
+
|
|
111
|
+
This token will expire and change every time you logout.
|
|
112
|
+
|
|
113
|
+
### Channels
|
|
114
|
+
|
|
115
|
+
The channels to back up are configured in another JSON file
|
|
116
|
+
(default: `channels.json`). It has the following format:
|
|
117
|
+
|
|
118
|
+
```json
|
|
119
|
+
{
|
|
120
|
+
"teams":
|
|
121
|
+
{
|
|
122
|
+
"team1":
|
|
123
|
+
{
|
|
124
|
+
"include": [ "channel1", "channel2", "channel3" ],
|
|
125
|
+
"exclude": [ "channel3" ]
|
|
126
|
+
},
|
|
127
|
+
|
|
128
|
+
"team2":
|
|
129
|
+
{
|
|
130
|
+
},
|
|
131
|
+
|
|
132
|
+
"team3":
|
|
133
|
+
{
|
|
134
|
+
"exclude": [ "channel4" ]
|
|
135
|
+
}
|
|
136
|
+
},
|
|
137
|
+
|
|
138
|
+
"direct":
|
|
139
|
+
[
|
|
140
|
+
"user1",
|
|
141
|
+
"user2"
|
|
142
|
+
],
|
|
143
|
+
|
|
144
|
+
"groups":
|
|
145
|
+
{
|
|
146
|
+
"exact":
|
|
147
|
+
[
|
|
148
|
+
[ "user3", "user4" ]
|
|
149
|
+
],
|
|
150
|
+
|
|
151
|
+
"subset":
|
|
152
|
+
[
|
|
153
|
+
[ "user3", "user5" ]
|
|
154
|
+
]
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
The file has three main keys `teams`, `direct`, and `group` related to the
|
|
160
|
+
channel types explained above.
|
|
161
|
+
|
|
162
|
+
`teams` contains a mapping of team names on dicts with the optional keys
|
|
163
|
+
`include` and `exclude`, which contain a list of channel names each. First, all
|
|
164
|
+
channels from the `include` list are put on the backup list. If the list is
|
|
165
|
+
empty or `include` is not given at all, all channels of the team are put on the
|
|
166
|
+
list. Then, if `exclude` is present, all channels of the `exclude` list are
|
|
167
|
+
removed from the backup list.
|
|
168
|
+
|
|
169
|
+
In this example, `channel1` and `channel2` of `team1` are backed up. `channel3`
|
|
170
|
+
will be excluded, because exclusion has priority. In addition, all channels of
|
|
171
|
+
`team2` are backed up as well as finally all channels except `channel4` from
|
|
172
|
+
`team3`.
|
|
173
|
+
|
|
174
|
+
`direct` contains a list of user names. Direct chats with these users are
|
|
175
|
+
backed up.
|
|
176
|
+
|
|
177
|
+
`groups` may contain two subkeys `exact` and `subset`. Both can contain a list
|
|
178
|
+
with lists of user names. A list of user names under `exact` selects a group if
|
|
179
|
+
it has exactly these members besides the user given in the credentials. A list
|
|
180
|
+
of user names under `subset` selects a group if the configured names are a
|
|
181
|
+
subset of the members of a group.
|
|
182
|
+
|
|
183
|
+
In this example the group with exactly `user3`, `user4`, and the credentials
|
|
184
|
+
user is selected and all groups with at least `user3` and `user5` and
|
|
185
|
+
eventually other users, provided that such groups exist.
|
|
186
|
+
|
|
187
|
+
### Command line
|
|
188
|
+
|
|
189
|
+
Execute matterbak with option `--help` to see the actual command line options:
|
|
190
|
+
|
|
191
|
+
```txt
|
|
192
|
+
usage: matterbak [-h] [--credentials CREDENTIALS] [--channels CHANNELS]
|
|
193
|
+
[-d DATA_DIR] [-o OUTPUT_ZIP] [--update-old-posts]
|
|
194
|
+
[--skip-direct] [--skip-groups] [--skip-teams] [--skip-users]
|
|
195
|
+
[--skip-user-images] [--skip-emojis]
|
|
196
|
+
[--rate-limit RATE_LIMIT] [--initial-jitter INITIAL_JITTER]
|
|
197
|
+
[--step-jitter STEP_JITTER] [--version]
|
|
198
|
+
|
|
199
|
+
options:
|
|
200
|
+
-h, --help show this help message and exit
|
|
201
|
+
--credentials CREDENTIALS
|
|
202
|
+
json file containing user name, password and server
|
|
203
|
+
URL, default = credentials.json
|
|
204
|
+
--channels CHANNELS json file listing all channels to backup, default =
|
|
205
|
+
channels.json
|
|
206
|
+
-d DATA_DIR, --data-dir DATA_DIR
|
|
207
|
+
Dir to store downloaded data in, absolute or relative
|
|
208
|
+
to current dir, default = data
|
|
209
|
+
-o OUTPUT_ZIP, --output-zip OUTPUT_ZIP
|
|
210
|
+
zip file to write, default is 'matterbak_<user>.zip'
|
|
211
|
+
--update-old-posts Update also old posts in case they changed since last
|
|
212
|
+
update
|
|
213
|
+
--skip-direct skip direct channels
|
|
214
|
+
--skip-groups skip group channels
|
|
215
|
+
--skip-teams skip team channels
|
|
216
|
+
--skip-users Skip storing personal user data (includes --skip-user-
|
|
217
|
+
images)
|
|
218
|
+
--skip-user-images Skip storing user images
|
|
219
|
+
--skip-emojis Skip storing custom emojis
|
|
220
|
+
--rate-limit RATE_LIMIT
|
|
221
|
+
Max API calls per second. Default: 10. Set to 0 to
|
|
222
|
+
disable.
|
|
223
|
+
--initial-jitter INITIAL_JITTER
|
|
224
|
+
Random delay in seconds at script start. Default: 0.
|
|
225
|
+
--step-jitter STEP_JITTER
|
|
226
|
+
--initial-jitter INITIAL_JITTER
|
|
227
|
+
Random delay in seconds at script start. Default: 0.
|
|
228
|
+
--step-jitter STEP_JITTER
|
|
229
|
+
Random delay in seconds between each backup unit.
|
|
230
|
+
Default: 0.
|
|
231
|
+
--version show program's version number and exit
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
The script creates folders under the given `--data-dir` for the respective
|
|
235
|
+
types of data and updates their content on subsequent runs. Finally all data
|
|
236
|
+
in the *data-dir* is stored in the `--output-zip` file.
|
|
237
|
+
|
|
238
|
+
The skip options avoid to download the respective data. This may save time if
|
|
239
|
+
you do not need such data or know that there are no new data of that type on
|
|
240
|
+
update. You may rerun without the skip option at any time to download that
|
|
241
|
+
data as well. So we recommend to skip any type of data except one on the first
|
|
242
|
+
try.
|
|
243
|
+
|
|
244
|
+
The rate limit and jitter options limit the download rate to avoid overloading
|
|
245
|
+
the server. Set to sensible values to be fair to the server operator and other
|
|
246
|
+
users.
|
|
247
|
+
|
|
248
|
+
## Data
|
|
249
|
+
|
|
250
|
+
Every Mattermost object has a unique ID. This is always part of the name of the
|
|
251
|
+
respective file. So if one data file references other data by ID you may easily
|
|
252
|
+
find the referenced data.
|
|
253
|
+
|
|
254
|
+
Some files contain binary data. Those are files that were attached to a post as
|
|
255
|
+
well as images (user images, team icons, custom emojis). These files contain
|
|
256
|
+
the ID of the object they belong to in their names.
|
|
257
|
+
|
|
258
|
+
Team and channel members are stored in a special file, containing a list with
|
|
259
|
+
all team/channel member objects.
|
|
260
|
+
|
|
261
|
+
A special case are threads. Threads are not Mattermost objects. To store the
|
|
262
|
+
threads, any channel data directory contains a threads file with a mapping of
|
|
263
|
+
post IDs. It maps the ID if the root post of a thread on a list of IDs if the
|
|
264
|
+
answer posts. Don't mess up with this file. It will be updated on each run,
|
|
265
|
+
which may fail if the file is corrupted.
|
|
266
|
+
|
|
267
|
+
## Development install
|
|
268
|
+
|
|
269
|
+
Clone the [GitHub repo](https://github.com/bjhend/matterbak):
|
|
270
|
+
|
|
271
|
+
```sh
|
|
272
|
+
git clone https://github.com/bjhend/matterbak.git
|
|
273
|
+
cd matterbak
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
### Run from local repo
|
|
277
|
+
|
|
278
|
+
We recommend to use `uv` or `poetry` to run matterbak. Otherwise install the
|
|
279
|
+
dependencies defined in `pyproject.toml` manually, see section
|
|
280
|
+
[Requirements](#requirements) above.
|
|
281
|
+
|
|
282
|
+
If you are already using `uv` or `poetry` for other projects:
|
|
283
|
+
|
|
284
|
+
```sh
|
|
285
|
+
# With uv
|
|
286
|
+
uv run matterbak
|
|
287
|
+
|
|
288
|
+
# With poetry
|
|
289
|
+
poetry run matterbak
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
Alternatively make an editable install of the local repo:
|
|
293
|
+
|
|
294
|
+
```sh
|
|
295
|
+
pip install -e .
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
## Notes
|
|
299
|
+
|
|
300
|
+
There is also another more complete [Python implementation of the mattermost API](https://github.com/Vaelor/python-mattermost-driver)
|
|
301
|
+
but it needs more configuration.
|
|
302
|
+
|
|
303
|
+
The [official API docs](https://api.mattermost.com/) are also available.
|
|
304
|
+
|
|
305
|
+
## License
|
|
306
|
+
|
|
307
|
+
MIT
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
# matterbak
|
|
2
|
+
|
|
3
|
+
Back up Mattermost channels of any type with all
|
|
4
|
+
posts, threads, files, users, emojis.
|
|
5
|
+
|
|
6
|
+
A note on **personal data**: This script can download personal data of the
|
|
7
|
+
users like name, nickname, e-mail-address, picture, etc. This may cause legal
|
|
8
|
+
problems. To avoid that, call the script with option `--skip-users`. You will
|
|
9
|
+
still find user IDs in the data and which roles/permissions belong to them as
|
|
10
|
+
channel members but no personal data about the users behind those IDs. The
|
|
11
|
+
only exception are direct and group channels in the backup, which contain the
|
|
12
|
+
usernames in the filenames.
|
|
13
|
+
|
|
14
|
+
Mattermost knows several types of channels:
|
|
15
|
+
|
|
16
|
+
* Direct channels contain a chat with a single user outside of a team
|
|
17
|
+
* Group channels contain a chat with multiple users outside of a team
|
|
18
|
+
* Channels belonging to a team
|
|
19
|
+
|
|
20
|
+
You can configure for each type which channels should be backed up.
|
|
21
|
+
|
|
22
|
+
Subsequent runs of the script with the same data dir will update the saved
|
|
23
|
+
data. So you can run it once to create an initial backup and later update that
|
|
24
|
+
backup by running it again. If you later add more channels to the configuration
|
|
25
|
+
those will be downloaded as well. In case you have accidentally deleted part of
|
|
26
|
+
the files of a channel, delete all channel files and update again. Otherwise
|
|
27
|
+
updates may get broken.
|
|
28
|
+
|
|
29
|
+
We implement safe interruption handling: Writes of images and JSON files are
|
|
30
|
+
protected against partial writes due to interruptions (`Ctrl+C` (SIGINT) and
|
|
31
|
+
`kill` (SIGTERM) signals). The program delays interruptions while writing to
|
|
32
|
+
prevent data corruption. After writing finishes, normal interruption behavior
|
|
33
|
+
resumes and delayed interruptions are called -- you can safely stop the
|
|
34
|
+
program with `Ctrl+C` or `kill` at any time.
|
|
35
|
+
|
|
36
|
+
**Attention**: Updating will skip any changes to older posts unless you give
|
|
37
|
+
option `--update-old-posts`.
|
|
38
|
+
|
|
39
|
+
## Requirements
|
|
40
|
+
|
|
41
|
+
The script should work with Python 3.8 or later.
|
|
42
|
+
The [mattermost module](https://github.com/someone-somenet-org/mattermost-python-api)
|
|
43
|
+
is needed for easier API access.
|
|
44
|
+
|
|
45
|
+
## Installation
|
|
46
|
+
|
|
47
|
+
Matterbak is available as package from PyPI, so you can install it with `pip` or
|
|
48
|
+
`pipx`:
|
|
49
|
+
|
|
50
|
+
```sh
|
|
51
|
+
# Install from PyPI
|
|
52
|
+
pipx install matterbak
|
|
53
|
+
|
|
54
|
+
# Test run the tool
|
|
55
|
+
matterbak --version
|
|
56
|
+
matterbak --help
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
We recommend [`pipx`](https://pipx.pypa.io), because matterbak is an executable
|
|
60
|
+
script.
|
|
61
|
+
|
|
62
|
+
## Configuration
|
|
63
|
+
|
|
64
|
+
### Credentials
|
|
65
|
+
|
|
66
|
+
You will need a json config (default name `credentials.json`) with the
|
|
67
|
+
following format
|
|
68
|
+
|
|
69
|
+
```json
|
|
70
|
+
{
|
|
71
|
+
"user": "my_name",
|
|
72
|
+
"password": "super_secret_pass",
|
|
73
|
+
"url": "https://mattermost.server.org/api"
|
|
74
|
+
}
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Username is the name you have in Mattermost. You can find it by clicking on
|
|
78
|
+
your avatar in the top right corner
|
|
79
|
+
as the name after the `@` sign. Do not include the `@`.
|
|
80
|
+
|
|
81
|
+
If you login via GitLab or a comparable service, replace `password` wth `token`
|
|
82
|
+
and enter the *MMAUTHTOKEN* here. To retrieve it, login via your browser and
|
|
83
|
+
inspect the cookies for *MMAUTHTOKEN*
|
|
84
|
+
|
|
85
|
+
1. Open DevTools (F12)
|
|
86
|
+
2. Go to Application (Chrome/Edge) or Storage (Firefox)
|
|
87
|
+
3. Navigate to Cookies, look at your Mattermost domain
|
|
88
|
+
|
|
89
|
+
This token will expire and change every time you logout.
|
|
90
|
+
|
|
91
|
+
### Channels
|
|
92
|
+
|
|
93
|
+
The channels to back up are configured in another JSON file
|
|
94
|
+
(default: `channels.json`). It has the following format:
|
|
95
|
+
|
|
96
|
+
```json
|
|
97
|
+
{
|
|
98
|
+
"teams":
|
|
99
|
+
{
|
|
100
|
+
"team1":
|
|
101
|
+
{
|
|
102
|
+
"include": [ "channel1", "channel2", "channel3" ],
|
|
103
|
+
"exclude": [ "channel3" ]
|
|
104
|
+
},
|
|
105
|
+
|
|
106
|
+
"team2":
|
|
107
|
+
{
|
|
108
|
+
},
|
|
109
|
+
|
|
110
|
+
"team3":
|
|
111
|
+
{
|
|
112
|
+
"exclude": [ "channel4" ]
|
|
113
|
+
}
|
|
114
|
+
},
|
|
115
|
+
|
|
116
|
+
"direct":
|
|
117
|
+
[
|
|
118
|
+
"user1",
|
|
119
|
+
"user2"
|
|
120
|
+
],
|
|
121
|
+
|
|
122
|
+
"groups":
|
|
123
|
+
{
|
|
124
|
+
"exact":
|
|
125
|
+
[
|
|
126
|
+
[ "user3", "user4" ]
|
|
127
|
+
],
|
|
128
|
+
|
|
129
|
+
"subset":
|
|
130
|
+
[
|
|
131
|
+
[ "user3", "user5" ]
|
|
132
|
+
]
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
The file has three main keys `teams`, `direct`, and `group` related to the
|
|
138
|
+
channel types explained above.
|
|
139
|
+
|
|
140
|
+
`teams` contains a mapping of team names on dicts with the optional keys
|
|
141
|
+
`include` and `exclude`, which contain a list of channel names each. First, all
|
|
142
|
+
channels from the `include` list are put on the backup list. If the list is
|
|
143
|
+
empty or `include` is not given at all, all channels of the team are put on the
|
|
144
|
+
list. Then, if `exclude` is present, all channels of the `exclude` list are
|
|
145
|
+
removed from the backup list.
|
|
146
|
+
|
|
147
|
+
In this example, `channel1` and `channel2` of `team1` are backed up. `channel3`
|
|
148
|
+
will be excluded, because exclusion has priority. In addition, all channels of
|
|
149
|
+
`team2` are backed up as well as finally all channels except `channel4` from
|
|
150
|
+
`team3`.
|
|
151
|
+
|
|
152
|
+
`direct` contains a list of user names. Direct chats with these users are
|
|
153
|
+
backed up.
|
|
154
|
+
|
|
155
|
+
`groups` may contain two subkeys `exact` and `subset`. Both can contain a list
|
|
156
|
+
with lists of user names. A list of user names under `exact` selects a group if
|
|
157
|
+
it has exactly these members besides the user given in the credentials. A list
|
|
158
|
+
of user names under `subset` selects a group if the configured names are a
|
|
159
|
+
subset of the members of a group.
|
|
160
|
+
|
|
161
|
+
In this example the group with exactly `user3`, `user4`, and the credentials
|
|
162
|
+
user is selected and all groups with at least `user3` and `user5` and
|
|
163
|
+
eventually other users, provided that such groups exist.
|
|
164
|
+
|
|
165
|
+
### Command line
|
|
166
|
+
|
|
167
|
+
Execute matterbak with option `--help` to see the actual command line options:
|
|
168
|
+
|
|
169
|
+
```txt
|
|
170
|
+
usage: matterbak [-h] [--credentials CREDENTIALS] [--channels CHANNELS]
|
|
171
|
+
[-d DATA_DIR] [-o OUTPUT_ZIP] [--update-old-posts]
|
|
172
|
+
[--skip-direct] [--skip-groups] [--skip-teams] [--skip-users]
|
|
173
|
+
[--skip-user-images] [--skip-emojis]
|
|
174
|
+
[--rate-limit RATE_LIMIT] [--initial-jitter INITIAL_JITTER]
|
|
175
|
+
[--step-jitter STEP_JITTER] [--version]
|
|
176
|
+
|
|
177
|
+
options:
|
|
178
|
+
-h, --help show this help message and exit
|
|
179
|
+
--credentials CREDENTIALS
|
|
180
|
+
json file containing user name, password and server
|
|
181
|
+
URL, default = credentials.json
|
|
182
|
+
--channels CHANNELS json file listing all channels to backup, default =
|
|
183
|
+
channels.json
|
|
184
|
+
-d DATA_DIR, --data-dir DATA_DIR
|
|
185
|
+
Dir to store downloaded data in, absolute or relative
|
|
186
|
+
to current dir, default = data
|
|
187
|
+
-o OUTPUT_ZIP, --output-zip OUTPUT_ZIP
|
|
188
|
+
zip file to write, default is 'matterbak_<user>.zip'
|
|
189
|
+
--update-old-posts Update also old posts in case they changed since last
|
|
190
|
+
update
|
|
191
|
+
--skip-direct skip direct channels
|
|
192
|
+
--skip-groups skip group channels
|
|
193
|
+
--skip-teams skip team channels
|
|
194
|
+
--skip-users Skip storing personal user data (includes --skip-user-
|
|
195
|
+
images)
|
|
196
|
+
--skip-user-images Skip storing user images
|
|
197
|
+
--skip-emojis Skip storing custom emojis
|
|
198
|
+
--rate-limit RATE_LIMIT
|
|
199
|
+
Max API calls per second. Default: 10. Set to 0 to
|
|
200
|
+
disable.
|
|
201
|
+
--initial-jitter INITIAL_JITTER
|
|
202
|
+
Random delay in seconds at script start. Default: 0.
|
|
203
|
+
--step-jitter STEP_JITTER
|
|
204
|
+
--initial-jitter INITIAL_JITTER
|
|
205
|
+
Random delay in seconds at script start. Default: 0.
|
|
206
|
+
--step-jitter STEP_JITTER
|
|
207
|
+
Random delay in seconds between each backup unit.
|
|
208
|
+
Default: 0.
|
|
209
|
+
--version show program's version number and exit
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
The script creates folders under the given `--data-dir` for the respective
|
|
213
|
+
types of data and updates their content on subsequent runs. Finally all data
|
|
214
|
+
in the *data-dir* is stored in the `--output-zip` file.
|
|
215
|
+
|
|
216
|
+
The skip options avoid to download the respective data. This may save time if
|
|
217
|
+
you do not need such data or know that there are no new data of that type on
|
|
218
|
+
update. You may rerun without the skip option at any time to download that
|
|
219
|
+
data as well. So we recommend to skip any type of data except one on the first
|
|
220
|
+
try.
|
|
221
|
+
|
|
222
|
+
The rate limit and jitter options limit the download rate to avoid overloading
|
|
223
|
+
the server. Set to sensible values to be fair to the server operator and other
|
|
224
|
+
users.
|
|
225
|
+
|
|
226
|
+
## Data
|
|
227
|
+
|
|
228
|
+
Every Mattermost object has a unique ID. This is always part of the name of the
|
|
229
|
+
respective file. So if one data file references other data by ID you may easily
|
|
230
|
+
find the referenced data.
|
|
231
|
+
|
|
232
|
+
Some files contain binary data. Those are files that were attached to a post as
|
|
233
|
+
well as images (user images, team icons, custom emojis). These files contain
|
|
234
|
+
the ID of the object they belong to in their names.
|
|
235
|
+
|
|
236
|
+
Team and channel members are stored in a special file, containing a list with
|
|
237
|
+
all team/channel member objects.
|
|
238
|
+
|
|
239
|
+
A special case are threads. Threads are not Mattermost objects. To store the
|
|
240
|
+
threads, any channel data directory contains a threads file with a mapping of
|
|
241
|
+
post IDs. It maps the ID if the root post of a thread on a list of IDs if the
|
|
242
|
+
answer posts. Don't mess up with this file. It will be updated on each run,
|
|
243
|
+
which may fail if the file is corrupted.
|
|
244
|
+
|
|
245
|
+
## Development install
|
|
246
|
+
|
|
247
|
+
Clone the [GitHub repo](https://github.com/bjhend/matterbak):
|
|
248
|
+
|
|
249
|
+
```sh
|
|
250
|
+
git clone https://github.com/bjhend/matterbak.git
|
|
251
|
+
cd matterbak
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
### Run from local repo
|
|
255
|
+
|
|
256
|
+
We recommend to use `uv` or `poetry` to run matterbak. Otherwise install the
|
|
257
|
+
dependencies defined in `pyproject.toml` manually, see section
|
|
258
|
+
[Requirements](#requirements) above.
|
|
259
|
+
|
|
260
|
+
If you are already using `uv` or `poetry` for other projects:
|
|
261
|
+
|
|
262
|
+
```sh
|
|
263
|
+
# With uv
|
|
264
|
+
uv run matterbak
|
|
265
|
+
|
|
266
|
+
# With poetry
|
|
267
|
+
poetry run matterbak
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
Alternatively make an editable install of the local repo:
|
|
271
|
+
|
|
272
|
+
```sh
|
|
273
|
+
pip install -e .
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
## Notes
|
|
277
|
+
|
|
278
|
+
There is also another more complete [Python implementation of the mattermost API](https://github.com/Vaelor/python-mattermost-driver)
|
|
279
|
+
but it needs more configuration.
|
|
280
|
+
|
|
281
|
+
The [official API docs](https://api.mattermost.com/) are also available.
|
|
282
|
+
|
|
283
|
+
## License
|
|
284
|
+
|
|
285
|
+
MIT
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "matterbak"
|
|
3
|
+
description = "Backing up mattermost channels (including files) and users"
|
|
4
|
+
version = "0.4.0"
|
|
5
|
+
requires-python = ">= 3.8"
|
|
6
|
+
dependencies = [
|
|
7
|
+
# We extended the mattermost package so we enforce a specific minor version
|
|
8
|
+
"mattermost==10.11.*",
|
|
9
|
+
]
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = [ "LICENSE" ]
|
|
13
|
+
authors = [
|
|
14
|
+
{ name="Michael Behrisch", email="oss@behrisch.de" },
|
|
15
|
+
{ name="Björn Hendriks", email="bjoern.hendriks@dlr.de" },
|
|
16
|
+
{ name="Daniel Mohr", email="daniel.mohr@dlr.de" },
|
|
17
|
+
]
|
|
18
|
+
maintainers = [
|
|
19
|
+
{ name="Björn Hendriks", email="bjoern.hendriks@dlr.de" },
|
|
20
|
+
]
|
|
21
|
+
keywords = [ "mattermost", "backup", "tool", "chat" ]
|
|
22
|
+
classifiers = [
|
|
23
|
+
"Programming Language :: Python :: 3",
|
|
24
|
+
"Environment :: Console",
|
|
25
|
+
"Topic :: Communications :: Chat",
|
|
26
|
+
"Topic :: System :: Archiving :: Backup",
|
|
27
|
+
"Topic :: Utilities",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Repository = "https://github.com/bjhend/matterbak"
|
|
33
|
+
Issues = "https://github.com/bjhend/matterbak/issues"
|
|
34
|
+
Changelog = "https://github.com/bjhend/matterbak/blob/main/CHANGELOG.md"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
[project.scripts]
|
|
38
|
+
matterbak = "matterbak.matterbak:main"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
[build-system]
|
|
42
|
+
requires = [ "setuptools>=42" ]
|
|
43
|
+
build-backend = "setuptools.build_meta"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
[tool.pylint.'MAIN']
|
|
47
|
+
# ignores E0401: Unable to import 'mattermost' (import-error)
|
|
48
|
+
# allows to run in pre stage in ci pipelines (github action)
|
|
49
|
+
ignored-modules = "mattermost"
|