bookstack-file-exporter 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. bookstack-file-exporter-0.0.1/LICENSE +21 -0
  2. bookstack-file-exporter-0.0.1/PKG-INFO +251 -0
  3. bookstack-file-exporter-0.0.1/README.md +234 -0
  4. bookstack-file-exporter-0.0.1/bookstack_file_exporter/__init__.py +0 -0
  5. bookstack-file-exporter-0.0.1/bookstack_file_exporter/__main__.py +16 -0
  6. bookstack-file-exporter-0.0.1/bookstack_file_exporter/archiver/__init__.py +0 -0
  7. bookstack-file-exporter-0.0.1/bookstack_file_exporter/archiver/archiver.py +125 -0
  8. bookstack-file-exporter-0.0.1/bookstack_file_exporter/archiver/minio_archiver.py +56 -0
  9. bookstack-file-exporter-0.0.1/bookstack_file_exporter/archiver/util.py +43 -0
  10. bookstack-file-exporter-0.0.1/bookstack_file_exporter/common/__init__.py +0 -0
  11. bookstack-file-exporter-0.0.1/bookstack_file_exporter/common/util.py +32 -0
  12. bookstack-file-exporter-0.0.1/bookstack_file_exporter/config_helper/__init__.py +0 -0
  13. bookstack-file-exporter-0.0.1/bookstack_file_exporter/config_helper/config_helper.py +200 -0
  14. bookstack-file-exporter-0.0.1/bookstack_file_exporter/config_helper/models.py +29 -0
  15. bookstack-file-exporter-0.0.1/bookstack_file_exporter/config_helper/remote.py +29 -0
  16. bookstack-file-exporter-0.0.1/bookstack_file_exporter/exporter/__init__.py +0 -0
  17. bookstack-file-exporter-0.0.1/bookstack_file_exporter/exporter/exporter.py +144 -0
  18. bookstack-file-exporter-0.0.1/bookstack_file_exporter/exporter/node.py +79 -0
  19. bookstack-file-exporter-0.0.1/bookstack_file_exporter/exporter/util.py +17 -0
  20. bookstack-file-exporter-0.0.1/bookstack_file_exporter/run.py +53 -0
  21. bookstack-file-exporter-0.0.1/bookstack_file_exporter/run_args.py +36 -0
  22. bookstack-file-exporter-0.0.1/bookstack_file_exporter.egg-info/PKG-INFO +251 -0
  23. bookstack-file-exporter-0.0.1/bookstack_file_exporter.egg-info/SOURCES.txt +28 -0
  24. bookstack-file-exporter-0.0.1/bookstack_file_exporter.egg-info/dependency_links.txt +1 -0
  25. bookstack-file-exporter-0.0.1/bookstack_file_exporter.egg-info/entry_points.txt +2 -0
  26. bookstack-file-exporter-0.0.1/bookstack_file_exporter.egg-info/requires.txt +4 -0
  27. bookstack-file-exporter-0.0.1/bookstack_file_exporter.egg-info/top_level.txt +1 -0
  28. bookstack-file-exporter-0.0.1/pyproject.toml +3 -0
  29. bookstack-file-exporter-0.0.1/setup.cfg +30 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 homeylab
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,251 @@
1
+ Metadata-Version: 2.1
2
+ Name: bookstack-file-exporter
3
+ Version: 0.0.1
4
+ Summary: An exporter written in python to export all documents from a bookstack instance in different formats
5
+ Home-page: https://github.com/homeylab/bookstack-file-exporter
6
+ Author: pchang388
7
+ License: MIT License
8
+ Keywords: bookstack,exporter
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: Pyyaml>=6.0.1
14
+ Requires-Dist: Pydantic>=2.3.0
15
+ Requires-Dist: requests>=2.31.0
16
+ Requires-Dist: minio>=7.1.16
17
+
18
+ # bookstack-file-exporter
19
+
20
+ _This is project is still under active development. Functionality is there and is relatively stable at this time._
21
+
22
+ This tool provides a way to export Bookstack pages in a folder-tree layout locally with an option to push to remote object storage locations.
23
+
24
+ This small project was mainly created to run as a cron job in k8s but works anywhere. This would allow me to export my docs in markdown, or other formats like pdf. I use Bookstack's markdown editor as default instead of WYSIWYG editor and this makes my notes portable anywhere even if offline.
25
+
26
+ The main use case is to backup all docs in a folder-tree format to cover the scenarios:
27
+
28
+ 1. Offline copy wanted.
29
+ 2. Back up at a file level as an accessory or alternative to disk and volume backups.
30
+ 3. Share docs with another person to keep locally.
31
+ 4. Migrate to Markdown documenting for simplicity.
32
+ 5. Provide an easy way to do automated file backups locally, in docker, or kubernetes.
33
+
34
+ Supported backup formats are
35
+
36
+ 1. local
37
+ 2. minio
38
+ 3. s3 (Not Yet Implemented)
39
+
40
+ Backups are exported in `.tgz` format and generated based off timestamp. Export names will be in the format: `%Y-%m-%d_%H-%M-%S` (Year-Month-Day_Hour-Minute-Second). *Files are first pulled locally to create the tarball and then can be sent to object storage if needed*. Example file name: `bookstack_export_2023-09-22_07-19-54.tgz`.
41
+
42
+ This script can be run directly via cli as a pip module.
43
+ ```
44
+ # if you already have python bin directory in your path
45
+ bookstack-file-exporter -c <path_to_config_file>
46
+
47
+ # using pip
48
+ python -m bookstack_file_exporter -c <path_to_config_file>
49
+ ```
50
+
51
+ ## Using This Application
52
+
53
+ ### Run via Pip
54
+ Note: This application is tested and developed on Python `3.11.X`. It will probably work for >= `3.8` but is recommended to install (or set up a venv) a `3.11.X` version.
55
+
56
+ ```bash
57
+ python -m pip install bookstack-file-exporter
58
+
59
+ # if you already have python bin directory in your path
60
+ bookstack-file-exporter -c <path_to_config_file>
61
+
62
+ # using pip
63
+ python -m bookstack_file_exporter -c <path_to_config_file>
64
+ ```
65
+ Command line options:
66
+ | option | required | description |
67
+ | ------ | -------- | ----------- |
68
+ |`-c`, `--config-file`|True|Relative or Absolute path to a valid configuration file. This configuration file is checked against a schema for validation.|
69
+ |`-v`, `--log-level` |False, default: info|Provide a valid log level: info, debug, warning, error.|
70
+
71
+ ### Run Via Docker
72
+ Example
73
+ ```bash
74
+ docker run \
75
+ --user ${USER_ID}:${USER_GID} \
76
+ -v $(pwd)/local/config.yml:/export/config/config.yml:ro \
77
+ -v $(pwd)/bkps:/export/dump \
78
+ bookstack-file-exporter:0.0.1
79
+ ```
80
+ Required Options:
81
+ | option | description |
82
+ | `config.yml` file mount | Provide a valid configuration file. Specified in example as read only: `-v ${CURDIR}/local/config.yml:/export/config/config.yml:ro`, `${USER_LOCAL_PATH}:${STATIC_DOCKER_PATH}` |
83
+ | `dump` file mount | Directory to place exports. Specified in example: `-v ${CURDIR}/bkps:/export/dump`, `${USER_LOCAL_PATH}:${STATIC_DOCKER_PATH}` |
84
+
85
+ Tokens and other options can be specified, example:
86
+ ```bash
87
+ # '-e' flag for env vars
88
+ # --user flag to override the uid/gid for created files
89
+ docker run \
90
+ -e LOG_LEVEL='debug' \
91
+ -e BOOKSTACK_TOKEN_ID='xyz' \
92
+ -e BOOKSTACK_TOKEN_SECRET='xyz' \
93
+ --user 1000:1000 \
94
+ -v $(pwd)/local/config.yml:/export/config/config.yml:ro \
95
+ -v $(pwd):/export/dump \
96
+ bookstack-file-exporter:0.0.1
97
+ ```
98
+
99
+ ### Authentication
100
+ **Note visibility of pages is based on user**, so use a user that has access to pages you want to back up
101
+
102
+ Ref: [https://demo.bookstackapp.com/api/docs#authentication](https://demo.bookstackapp.com/api/docs#authentication)
103
+
104
+ Provide a tokenId and a tokenSecret as environment variables or directly in the configuration file.
105
+ - `BOOKSTACK_TOKEN_ID`
106
+ - `BOOKSTACK_TOKEN_SECRET`
107
+
108
+ For object storage authentication, find the relevant sections further down in this document.
109
+
110
+ ### Configuration file
111
+ See below for an example and explanation. Optionally, look at `examples/` folder for more.
112
+
113
+ Schema and values are checked so ensure proper settings are provided.
114
+ ```
115
+ # if http/https not specified, defaults to https
116
+ # if you put http here, it will try verify=false, to not check certs
117
+ host: "https://bookstack.yourdomain.com"
118
+
119
+ # You could optionally set the bookstack token_id and token_secret here instead of env
120
+ # If env variable is also supplied, env variable will take precedence
121
+ credentials:
122
+ token_id: ""
123
+ token_secret: ""
124
+
125
+ # additional headers to add, examples below
126
+ additional_headers:
127
+ test: "test"
128
+ test2: "test2"
129
+ User-Agent: "test-agent"
130
+
131
+ # supported formats from bookstack below
132
+ # valid formats: markdown, html, pdf, plaintext
133
+ # you can specify one or as many as you'd like
134
+ formats:
135
+ - markdown
136
+ - html
137
+ - pdf
138
+ - plaintext
139
+
140
+ # optional minio configuration
141
+ # If not required, you should omit/comment out the section
142
+ # You can specify env vars instead for access and secret key
143
+ # See Minio Backups section of this doc for more info on required fields
144
+ minio_config:
145
+ host: "minio.yourdomain.com"
146
+ access_key: ""
147
+ secret_key: ""
148
+ region: "us-east-1"
149
+ bucket: "mybucket"
150
+ path: "bookstack/backups"
151
+
152
+ # output directory for the exported archive
153
+ # relative or full path
154
+ # User who runs the command should have access to write and create sub folders in this directory
155
+ # optional, if not provided, will use current run directory by default
156
+ output_path: "bkps/"
157
+
158
+ # optional export of metadata about the page in a json file
159
+ # this metadata contains general information about the page
160
+ # like: last update, owner, revision count, etc.
161
+ # omit this or set to false if not needed
162
+ export_meta: true
163
+
164
+ # optional if using object storage targets
165
+ # After uploading to object storage targets, choose to clean up local files
166
+ # delete the archive from local filesystem
167
+ # will not be cleaned up if set to false or omitted
168
+ clean_up: true
169
+ ```
170
+
171
+ ### Backup Behavior
172
+ We will use slug names (from Bookstack API) by default, as such certain characters like `!`, `/` will be ignored and spaces replaced.
173
+
174
+ All sub directories will be created as required during the export process.
175
+
176
+ ```
177
+ Shelves --> Books --> Chapters --> Pages
178
+
179
+ ## Example
180
+ kafka
181
+ ---> controller
182
+ ---> settings
183
+ ---> logs (chapter)
184
+ ---> retention.md
185
+ ---> compression.pdf
186
+ ---> something.html
187
+ ---> other.txt
188
+ ---> optional
189
+ ---> main
190
+ ---> deploy
191
+ ---> broker
192
+ ---> settings
193
+ ---> deploy
194
+ ---> schema-registry
195
+ ---> protobuf
196
+ ---> settings
197
+ ```
198
+
199
+ Books without a shelf will be put in a shelve folder named `unassigned`.
200
+
201
+ Empty/New Pages will be ignored since they have not been modified yet from creation and are empty but also do not have a valid slug. Example:
202
+ ```
203
+ {
204
+ ...
205
+ "name": "New Page",
206
+ "slug": "",
207
+ ...
208
+ }
209
+ ```
210
+
211
+ You may notice some directories (books) and/or files (pages) in the archive have a random string at the end, example - `nKA`: `user-and-group-management-nKA`. This is expected and is because there were resources with the same name created in another shelve and bookstack adds a string at the end to ensure uniqueness.
212
+
213
+ ### Minio Backups
214
+ When specifying `minio_config` in the configuration file, these fields are required in the file:
215
+ ```
216
+ # a host/ip + port combination is also allowed
217
+ # example: "minio.yourdomain.com:8443"
218
+ host: "minio.yourdomain.com"
219
+
220
+ # this is required since minio api appears to require it
221
+ # set to the region your bucket resides in
222
+ # if unsure, try "us-east-1" first
223
+ region: "us-east-1"
224
+
225
+ # bucket to upload to
226
+ bucket "mybucket"
227
+ ```
228
+
229
+ These fields are optional:
230
+ ```
231
+ # access key for the minio instance
232
+ # optionally set as env variable instead
233
+ access_key: ""
234
+
235
+ # secret key for the minio instance
236
+ # optionally set as env variable instead
237
+ secret_key: ""
238
+
239
+ # the path of the backup
240
+ # in example below, the exported archive will appear in: `<bucket_name>:/bookstack/backups/bookstack-<timestamp>.tgz`
241
+ path: "bookstack/backups"
242
+ ```
243
+
244
+ As mentioned you can optionally set access and secret key as env variables. If both are specified, env variable will take precedence.
245
+ - `MINIO_ACCESS_KEY`
246
+ - `MINIO_SECRET_KEY`
247
+
248
+ ## Future Items
249
+ 1. Be able to pull media/photos locally and place in their respective page folders for a more complete file level backup.
250
+ 2. Include the exporter in a maintained helm chart as an optional deployment. The helm chart is [here](https://github.com/homeylab/helm-charts/tree/main/charts/bookstack).
251
+ 3. Export S3 or more options.
@@ -0,0 +1,234 @@
1
+ # bookstack-file-exporter
2
+
3
+ _This is project is still under active development. Functionality is there and is relatively stable at this time._
4
+
5
+ This tool provides a way to export Bookstack pages in a folder-tree layout locally with an option to push to remote object storage locations.
6
+
7
+ This small project was mainly created to run as a cron job in k8s but works anywhere. This would allow me to export my docs in markdown, or other formats like pdf. I use Bookstack's markdown editor as default instead of WYSIWYG editor and this makes my notes portable anywhere even if offline.
8
+
9
+ The main use case is to backup all docs in a folder-tree format to cover the scenarios:
10
+
11
+ 1. Offline copy wanted.
12
+ 2. Back up at a file level as an accessory or alternative to disk and volume backups.
13
+ 3. Share docs with another person to keep locally.
14
+ 4. Migrate to Markdown documenting for simplicity.
15
+ 5. Provide an easy way to do automated file backups locally, in docker, or kubernetes.
16
+
17
+ Supported backup formats are
18
+
19
+ 1. local
20
+ 2. minio
21
+ 3. s3 (Not Yet Implemented)
22
+
23
+ Backups are exported in `.tgz` format and generated based off timestamp. Export names will be in the format: `%Y-%m-%d_%H-%M-%S` (Year-Month-Day_Hour-Minute-Second). *Files are first pulled locally to create the tarball and then can be sent to object storage if needed*. Example file name: `bookstack_export_2023-09-22_07-19-54.tgz`.
24
+
25
+ This script can be run directly via cli as a pip module.
26
+ ```
27
+ # if you already have python bin directory in your path
28
+ bookstack-file-exporter -c <path_to_config_file>
29
+
30
+ # using pip
31
+ python -m bookstack_file_exporter -c <path_to_config_file>
32
+ ```
33
+
34
+ ## Using This Application
35
+
36
+ ### Run via Pip
37
+ Note: This application is tested and developed on Python `3.11.X`. It will probably work for >= `3.8` but is recommended to install (or set up a venv) a `3.11.X` version.
38
+
39
+ ```bash
40
+ python -m pip install bookstack-file-exporter
41
+
42
+ # if you already have python bin directory in your path
43
+ bookstack-file-exporter -c <path_to_config_file>
44
+
45
+ # using pip
46
+ python -m bookstack_file_exporter -c <path_to_config_file>
47
+ ```
48
+ Command line options:
49
+ | option | required | description |
50
+ | ------ | -------- | ----------- |
51
+ |`-c`, `--config-file`|True|Relative or Absolute path to a valid configuration file. This configuration file is checked against a schema for validation.|
52
+ |`-v`, `--log-level` |False, default: info|Provide a valid log level: info, debug, warning, error.|
53
+
54
+ ### Run Via Docker
55
+ Example
56
+ ```bash
57
+ docker run \
58
+ --user ${USER_ID}:${USER_GID} \
59
+ -v $(pwd)/local/config.yml:/export/config/config.yml:ro \
60
+ -v $(pwd)/bkps:/export/dump \
61
+ bookstack-file-exporter:0.0.1
62
+ ```
63
+ Required Options:
64
+ | option | description |
65
+ | `config.yml` file mount | Provide a valid configuration file. Specified in example as read only: `-v ${CURDIR}/local/config.yml:/export/config/config.yml:ro`, `${USER_LOCAL_PATH}:${STATIC_DOCKER_PATH}` |
66
+ | `dump` file mount | Directory to place exports. Specified in example: `-v ${CURDIR}/bkps:/export/dump`, `${USER_LOCAL_PATH}:${STATIC_DOCKER_PATH}` |
67
+
68
+ Tokens and other options can be specified, example:
69
+ ```bash
70
+ # '-e' flag for env vars
71
+ # --user flag to override the uid/gid for created files
72
+ docker run \
73
+ -e LOG_LEVEL='debug' \
74
+ -e BOOKSTACK_TOKEN_ID='xyz' \
75
+ -e BOOKSTACK_TOKEN_SECRET='xyz' \
76
+ --user 1000:1000 \
77
+ -v $(pwd)/local/config.yml:/export/config/config.yml:ro \
78
+ -v $(pwd):/export/dump \
79
+ bookstack-file-exporter:0.0.1
80
+ ```
81
+
82
+ ### Authentication
83
+ **Note visibility of pages is based on user**, so use a user that has access to pages you want to back up
84
+
85
+ Ref: [https://demo.bookstackapp.com/api/docs#authentication](https://demo.bookstackapp.com/api/docs#authentication)
86
+
87
+ Provide a tokenId and a tokenSecret as environment variables or directly in the configuration file.
88
+ - `BOOKSTACK_TOKEN_ID`
89
+ - `BOOKSTACK_TOKEN_SECRET`
90
+
91
+ For object storage authentication, find the relevant sections further down in this document.
92
+
93
+ ### Configuration file
94
+ See below for an example and explanation. Optionally, look at `examples/` folder for more.
95
+
96
+ Schema and values are checked so ensure proper settings are provided.
97
+ ```
98
+ # if http/https not specified, defaults to https
99
+ # if you put http here, it will try verify=false, to not check certs
100
+ host: "https://bookstack.yourdomain.com"
101
+
102
+ # You could optionally set the bookstack token_id and token_secret here instead of env
103
+ # If env variable is also supplied, env variable will take precedence
104
+ credentials:
105
+ token_id: ""
106
+ token_secret: ""
107
+
108
+ # additional headers to add, examples below
109
+ additional_headers:
110
+ test: "test"
111
+ test2: "test2"
112
+ User-Agent: "test-agent"
113
+
114
+ # supported formats from bookstack below
115
+ # valid formats: markdown, html, pdf, plaintext
116
+ # you can specify one or as many as you'd like
117
+ formats:
118
+ - markdown
119
+ - html
120
+ - pdf
121
+ - plaintext
122
+
123
+ # optional minio configuration
124
+ # If not required, you should omit/comment out the section
125
+ # You can specify env vars instead for access and secret key
126
+ # See Minio Backups section of this doc for more info on required fields
127
+ minio_config:
128
+ host: "minio.yourdomain.com"
129
+ access_key: ""
130
+ secret_key: ""
131
+ region: "us-east-1"
132
+ bucket: "mybucket"
133
+ path: "bookstack/backups"
134
+
135
+ # output directory for the exported archive
136
+ # relative or full path
137
+ # User who runs the command should have access to write and create sub folders in this directory
138
+ # optional, if not provided, will use current run directory by default
139
+ output_path: "bkps/"
140
+
141
+ # optional export of metadata about the page in a json file
142
+ # this metadata contains general information about the page
143
+ # like: last update, owner, revision count, etc.
144
+ # omit this or set to false if not needed
145
+ export_meta: true
146
+
147
+ # optional if using object storage targets
148
+ # After uploading to object storage targets, choose to clean up local files
149
+ # delete the archive from local filesystem
150
+ # will not be cleaned up if set to false or omitted
151
+ clean_up: true
152
+ ```
153
+
154
+ ### Backup Behavior
155
+ We will use slug names (from Bookstack API) by default, as such certain characters like `!`, `/` will be ignored and spaces replaced.
156
+
157
+ All sub directories will be created as required during the export process.
158
+
159
+ ```
160
+ Shelves --> Books --> Chapters --> Pages
161
+
162
+ ## Example
163
+ kafka
164
+ ---> controller
165
+ ---> settings
166
+ ---> logs (chapter)
167
+ ---> retention.md
168
+ ---> compression.pdf
169
+ ---> something.html
170
+ ---> other.txt
171
+ ---> optional
172
+ ---> main
173
+ ---> deploy
174
+ ---> broker
175
+ ---> settings
176
+ ---> deploy
177
+ ---> schema-registry
178
+ ---> protobuf
179
+ ---> settings
180
+ ```
181
+
182
+ Books without a shelf will be put in a shelve folder named `unassigned`.
183
+
184
+ Empty/New Pages will be ignored since they have not been modified yet from creation and are empty but also do not have a valid slug. Example:
185
+ ```
186
+ {
187
+ ...
188
+ "name": "New Page",
189
+ "slug": "",
190
+ ...
191
+ }
192
+ ```
193
+
194
+ You may notice some directories (books) and/or files (pages) in the archive have a random string at the end, example - `nKA`: `user-and-group-management-nKA`. This is expected and is because there were resources with the same name created in another shelve and bookstack adds a string at the end to ensure uniqueness.
195
+
196
+ ### Minio Backups
197
+ When specifying `minio_config` in the configuration file, these fields are required in the file:
198
+ ```
199
+ # a host/ip + port combination is also allowed
200
+ # example: "minio.yourdomain.com:8443"
201
+ host: "minio.yourdomain.com"
202
+
203
+ # this is required since minio api appears to require it
204
+ # set to the region your bucket resides in
205
+ # if unsure, try "us-east-1" first
206
+ region: "us-east-1"
207
+
208
+ # bucket to upload to
209
+ bucket "mybucket"
210
+ ```
211
+
212
+ These fields are optional:
213
+ ```
214
+ # access key for the minio instance
215
+ # optionally set as env variable instead
216
+ access_key: ""
217
+
218
+ # secret key for the minio instance
219
+ # optionally set as env variable instead
220
+ secret_key: ""
221
+
222
+ # the path of the backup
223
+ # in example below, the exported archive will appear in: `<bucket_name>:/bookstack/backups/bookstack-<timestamp>.tgz`
224
+ path: "bookstack/backups"
225
+ ```
226
+
227
+ As mentioned you can optionally set access and secret key as env variables. If both are specified, env variable will take precedence.
228
+ - `MINIO_ACCESS_KEY`
229
+ - `MINIO_SECRET_KEY`
230
+
231
+ ## Future Items
232
+ 1. Be able to pull media/photos locally and place in their respective page folders for a more complete file level backup.
233
+ 2. Include the exporter in a maintained helm chart as an optional deployment. The helm chart is [here](https://github.com/homeylab/helm-charts/tree/main/charts/bookstack).
234
+ 3. Export S3 or more options.
@@ -0,0 +1,16 @@
1
+ import argparse
2
+ import logging
3
+
4
+ from bookstack_file_exporter import run
5
+ from bookstack_file_exporter import run_args
6
+
7
+ def main():
8
+ """run entrypoint"""
9
+ args: argparse.Namespace = run_args.get_args()
10
+ logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s',
11
+ level=run_args.get_log_level(args.log_level), datefmt='%Y-%m-%d %H:%M:%S')
12
+ run.exporter(args)
13
+
14
+
15
+ if __name__ == '__main__':
16
+ main()
@@ -0,0 +1,125 @@
1
+ from typing import List, Dict, Union
2
+ from datetime import datetime
3
+ import logging
4
+
5
+ from bookstack_file_exporter.exporter.node import Node
6
+ from bookstack_file_exporter.archiver import util
7
+ from bookstack_file_exporter.archiver.minio_archiver import MinioArchiver
8
+ from bookstack_file_exporter.config_helper.remote import StorageProviderConfig
9
+
10
+ log = logging.getLogger(__name__)
11
+
12
+ _META_FILE_SUFFIX = "_meta.json"
13
+ _TAR_SUFFIX = ".tar"
14
+ _TAR_GZ_SUFFIX = ".tgz"
15
+
16
+ _EXPORT_API_PATH = "export"
17
+
18
+ _FILE_EXTENSION_MAP = {
19
+ "markdown": ".md",
20
+ "html": ".html",
21
+ "pdf": ".pdf",
22
+ "plaintext": ".txt",
23
+ "meta": _META_FILE_SUFFIX,
24
+ "tar": _TAR_SUFFIX,
25
+ "tgz": _TAR_GZ_SUFFIX
26
+ }
27
+
28
+ _DATE_STR_FORMAT = "%Y-%m-%d_%H-%M-%S"
29
+
30
+ class Archiver:
31
+ """
32
+ Archiver pulls all the necessary files from upstream
33
+ and then pushes them to the specified backup location(s)
34
+
35
+ Args:
36
+ :root_dir: str (required) = the base directory for
37
+ which the archive .tgz will be placed.
38
+ :add_meta: bool (required) = whether or not to add
39
+ metadata json files for each page, book, chapter, and/or shelve.
40
+ :base_page_url: str (required) = the full url and path to get page content.
41
+ :headers: Dict[str, str] (required) = the headers which include the Authorization to use
42
+
43
+ Returns:
44
+ Archiver instance with attributes that are
45
+ accessible for use for file level archival and backup.
46
+ """
47
+ def __init__(self, base_dir: str, add_meta: Union[bool, None],
48
+ base_page_url: str, headers: Dict[str, str]):
49
+ self.base_dir = base_dir
50
+ self.add_meta = add_meta
51
+ self.base_page_url = base_page_url
52
+ self._headers = headers
53
+ self._root_dir = self.generate_root_folder(self.base_dir)
54
+ # the tgz file will be name of
55
+ # parent export directory, bookstack-<timestamp>, and .tgz extension
56
+ self._archive_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tgz']}"
57
+ # name of intermediate tar file before gzip
58
+ self._tar_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tar']}"
59
+ # name of the base folder to use within the tgz archive
60
+ self._archive_base_path = self._root_dir.split("/")[-1]
61
+ # remote_system to function mapping
62
+ self._remote_exports = {'minio': self._archive_minio, 's3': self._archive_s3}
63
+
64
+ # create local tarball first
65
+ def archive(self, page_nodes: Dict[int, Node], export_formats: List[str]):
66
+ """create a .tgz of all page content"""
67
+ for _, page in page_nodes.items():
68
+ for ex_format in export_formats:
69
+ self._gather(page, ex_format)
70
+ self._gzip_tar()
71
+
72
+ # convert to bytes to be agnostic to end destination (future use case?)
73
+ def _gather(self, page_node: Node, export_format: str):
74
+ raw_data = self._get_data_format(page_node.id_, export_format)
75
+ self._gather_local(page_node.file_path, raw_data, export_format, page_node.meta)
76
+
77
+ def _gather_local(self, page_path: str, data: bytes,
78
+ export_format: str, meta_data: Union[bytes, None]):
79
+ page_file_name = f"{self._archive_base_path}/" \
80
+ f"{page_path}{_FILE_EXTENSION_MAP[export_format]}"
81
+ util.write_bytes(self._tar_file, file_path=page_file_name, data=data)
82
+ if self.add_meta:
83
+ meta_file_name = f"{self._archive_base_path}/{page_path}{_FILE_EXTENSION_MAP['meta']}"
84
+ bytes_meta = util.get_json_bytes(meta_data)
85
+ util.write_bytes(self._tar_file, file_path=meta_file_name, data=bytes_meta)
86
+
87
+ # send to remote systems
88
+ def archive_remote(self, remote_targets: Dict[str, StorageProviderConfig]):
89
+ """for each target, do their respective tasks"""
90
+ if remote_targets:
91
+ for key, value in remote_targets.items():
92
+ self._remote_exports[key](value)
93
+
94
+ def _gzip_tar(self):
95
+ util.create_gzip(self._tar_file, self._archive_file)
96
+
97
+ def _archive_minio(self, config: StorageProviderConfig):
98
+ minio_archiver = MinioArchiver(config)
99
+ minio_archiver.upload_backup(self._archive_file)
100
+
101
+ def _archive_s3(self, config: StorageProviderConfig):
102
+ pass
103
+
104
+ def clean_up(self, clean_up_archive: Union[bool, None]):
105
+ """remove archive after sending to remote target"""
106
+ self._clean(clean_up_archive)
107
+
108
+ def _clean(self, clean_up_archive: Union[bool, None]):
109
+ # if user is uploading to object storage
110
+ # delete the local .tgz archive since we have it there already
111
+ if clean_up_archive:
112
+ util.remove_file(self._archive_file)
113
+
114
+ # convert page data to bytes
115
+ def _get_data_format(self, page_node_id: int, export_format: str) -> bytes:
116
+ url = self._get_export_url(node_id=page_node_id, export_format=export_format)
117
+ return util.get_byte_response(url=url, headers=self._headers)
118
+
119
+ def _get_export_url(self, node_id: int, export_format: str) -> str:
120
+ return f"{self.base_page_url}/{node_id}/{_EXPORT_API_PATH}/{export_format}"
121
+
122
+ @staticmethod
123
+ def generate_root_folder(base_folder_name: str) -> str:
124
+ """return base archive name"""
125
+ return base_folder_name + "_" + datetime.now().strftime(_DATE_STR_FORMAT)