bookstack-file-exporter 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bookstack_file_exporter/__init__.py +0 -0
- bookstack_file_exporter/__main__.py +16 -0
- bookstack_file_exporter/archiver/__init__.py +0 -0
- bookstack_file_exporter/archiver/archiver.py +125 -0
- bookstack_file_exporter/archiver/minio_archiver.py +56 -0
- bookstack_file_exporter/archiver/util.py +43 -0
- bookstack_file_exporter/common/__init__.py +0 -0
- bookstack_file_exporter/common/util.py +32 -0
- bookstack_file_exporter/config_helper/__init__.py +0 -0
- bookstack_file_exporter/config_helper/config_helper.py +200 -0
- bookstack_file_exporter/config_helper/models.py +29 -0
- bookstack_file_exporter/config_helper/remote.py +29 -0
- bookstack_file_exporter/exporter/__init__.py +0 -0
- bookstack_file_exporter/exporter/exporter.py +144 -0
- bookstack_file_exporter/exporter/node.py +79 -0
- bookstack_file_exporter/exporter/util.py +17 -0
- bookstack_file_exporter/run.py +53 -0
- bookstack_file_exporter/run_args.py +36 -0
- bookstack_file_exporter-0.0.1.dist-info/LICENSE +21 -0
- bookstack_file_exporter-0.0.1.dist-info/METADATA +251 -0
- bookstack_file_exporter-0.0.1.dist-info/RECORD +24 -0
- bookstack_file_exporter-0.0.1.dist-info/WHEEL +5 -0
- bookstack_file_exporter-0.0.1.dist-info/entry_points.txt +2 -0
- bookstack_file_exporter-0.0.1.dist-info/top_level.txt +1 -0
|
File without changes
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
from bookstack_file_exporter import run
|
|
5
|
+
from bookstack_file_exporter import run_args
|
|
6
|
+
|
|
7
|
+
def main():
|
|
8
|
+
"""run entrypoint"""
|
|
9
|
+
args: argparse.Namespace = run_args.get_args()
|
|
10
|
+
logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s',
|
|
11
|
+
level=run_args.get_log_level(args.log_level), datefmt='%Y-%m-%d %H:%M:%S')
|
|
12
|
+
run.exporter(args)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
if __name__ == '__main__':
|
|
16
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
from typing import List, Dict, Union
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from bookstack_file_exporter.exporter.node import Node
|
|
6
|
+
from bookstack_file_exporter.archiver import util
|
|
7
|
+
from bookstack_file_exporter.archiver.minio_archiver import MinioArchiver
|
|
8
|
+
from bookstack_file_exporter.config_helper.remote import StorageProviderConfig
|
|
9
|
+
|
|
10
|
+
log = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
_META_FILE_SUFFIX = "_meta.json"
|
|
13
|
+
_TAR_SUFFIX = ".tar"
|
|
14
|
+
_TAR_GZ_SUFFIX = ".tgz"
|
|
15
|
+
|
|
16
|
+
_EXPORT_API_PATH = "export"
|
|
17
|
+
|
|
18
|
+
_FILE_EXTENSION_MAP = {
|
|
19
|
+
"markdown": ".md",
|
|
20
|
+
"html": ".html",
|
|
21
|
+
"pdf": ".pdf",
|
|
22
|
+
"plaintext": ".txt",
|
|
23
|
+
"meta": _META_FILE_SUFFIX,
|
|
24
|
+
"tar": _TAR_SUFFIX,
|
|
25
|
+
"tgz": _TAR_GZ_SUFFIX
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
_DATE_STR_FORMAT = "%Y-%m-%d_%H-%M-%S"
|
|
29
|
+
|
|
30
|
+
class Archiver:
|
|
31
|
+
"""
|
|
32
|
+
Archiver pulls all the necessary files from upstream
|
|
33
|
+
and then pushes them to the specified backup location(s)
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
:root_dir: str (required) = the base directory for
|
|
37
|
+
which the archive .tgz will be placed.
|
|
38
|
+
:add_meta: bool (required) = whether or not to add
|
|
39
|
+
metadata json files for each page, book, chapter, and/or shelve.
|
|
40
|
+
:base_page_url: str (required) = the full url and path to get page content.
|
|
41
|
+
:headers: Dict[str, str] (required) = the headers which include the Authorization to use
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Archiver instance with attributes that are
|
|
45
|
+
accessible for use for file level archival and backup.
|
|
46
|
+
"""
|
|
47
|
+
def __init__(self, base_dir: str, add_meta: Union[bool, None],
|
|
48
|
+
base_page_url: str, headers: Dict[str, str]):
|
|
49
|
+
self.base_dir = base_dir
|
|
50
|
+
self.add_meta = add_meta
|
|
51
|
+
self.base_page_url = base_page_url
|
|
52
|
+
self._headers = headers
|
|
53
|
+
self._root_dir = self.generate_root_folder(self.base_dir)
|
|
54
|
+
# the tgz file will be name of
|
|
55
|
+
# parent export directory, bookstack-<timestamp>, and .tgz extension
|
|
56
|
+
self._archive_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tgz']}"
|
|
57
|
+
# name of intermediate tar file before gzip
|
|
58
|
+
self._tar_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tar']}"
|
|
59
|
+
# name of the base folder to use within the tgz archive
|
|
60
|
+
self._archive_base_path = self._root_dir.split("/")[-1]
|
|
61
|
+
# remote_system to function mapping
|
|
62
|
+
self._remote_exports = {'minio': self._archive_minio, 's3': self._archive_s3}
|
|
63
|
+
|
|
64
|
+
# create local tarball first
|
|
65
|
+
def archive(self, page_nodes: Dict[int, Node], export_formats: List[str]):
|
|
66
|
+
"""create a .tgz of all page content"""
|
|
67
|
+
for _, page in page_nodes.items():
|
|
68
|
+
for ex_format in export_formats:
|
|
69
|
+
self._gather(page, ex_format)
|
|
70
|
+
self._gzip_tar()
|
|
71
|
+
|
|
72
|
+
# convert to bytes to be agnostic to end destination (future use case?)
|
|
73
|
+
def _gather(self, page_node: Node, export_format: str):
|
|
74
|
+
raw_data = self._get_data_format(page_node.id_, export_format)
|
|
75
|
+
self._gather_local(page_node.file_path, raw_data, export_format, page_node.meta)
|
|
76
|
+
|
|
77
|
+
def _gather_local(self, page_path: str, data: bytes,
|
|
78
|
+
export_format: str, meta_data: Union[bytes, None]):
|
|
79
|
+
page_file_name = f"{self._archive_base_path}/" \
|
|
80
|
+
f"{page_path}{_FILE_EXTENSION_MAP[export_format]}"
|
|
81
|
+
util.write_bytes(self._tar_file, file_path=page_file_name, data=data)
|
|
82
|
+
if self.add_meta:
|
|
83
|
+
meta_file_name = f"{self._archive_base_path}/{page_path}{_FILE_EXTENSION_MAP['meta']}"
|
|
84
|
+
bytes_meta = util.get_json_bytes(meta_data)
|
|
85
|
+
util.write_bytes(self._tar_file, file_path=meta_file_name, data=bytes_meta)
|
|
86
|
+
|
|
87
|
+
# send to remote systems
|
|
88
|
+
def archive_remote(self, remote_targets: Dict[str, StorageProviderConfig]):
|
|
89
|
+
"""for each target, do their respective tasks"""
|
|
90
|
+
if remote_targets:
|
|
91
|
+
for key, value in remote_targets.items():
|
|
92
|
+
self._remote_exports[key](value)
|
|
93
|
+
|
|
94
|
+
def _gzip_tar(self):
|
|
95
|
+
util.create_gzip(self._tar_file, self._archive_file)
|
|
96
|
+
|
|
97
|
+
def _archive_minio(self, config: StorageProviderConfig):
|
|
98
|
+
minio_archiver = MinioArchiver(config)
|
|
99
|
+
minio_archiver.upload_backup(self._archive_file)
|
|
100
|
+
|
|
101
|
+
def _archive_s3(self, config: StorageProviderConfig):
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
def clean_up(self, clean_up_archive: Union[bool, None]):
|
|
105
|
+
"""remove archive after sending to remote target"""
|
|
106
|
+
self._clean(clean_up_archive)
|
|
107
|
+
|
|
108
|
+
def _clean(self, clean_up_archive: Union[bool, None]):
|
|
109
|
+
# if user is uploading to object storage
|
|
110
|
+
# delete the local .tgz archive since we have it there already
|
|
111
|
+
if clean_up_archive:
|
|
112
|
+
util.remove_file(self._archive_file)
|
|
113
|
+
|
|
114
|
+
# convert page data to bytes
|
|
115
|
+
def _get_data_format(self, page_node_id: int, export_format: str) -> bytes:
|
|
116
|
+
url = self._get_export_url(node_id=page_node_id, export_format=export_format)
|
|
117
|
+
return util.get_byte_response(url=url, headers=self._headers)
|
|
118
|
+
|
|
119
|
+
def _get_export_url(self, node_id: int, export_format: str) -> str:
|
|
120
|
+
return f"{self.base_page_url}/{node_id}/{_EXPORT_API_PATH}/{export_format}"
|
|
121
|
+
|
|
122
|
+
@staticmethod
|
|
123
|
+
def generate_root_folder(base_folder_name: str) -> str:
|
|
124
|
+
"""return base archive name"""
|
|
125
|
+
return base_folder_name + "_" + datetime.now().strftime(_DATE_STR_FORMAT)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
from minio import Minio
|
|
5
|
+
|
|
6
|
+
from bookstack_file_exporter.config_helper.remote import StorageProviderConfig
|
|
7
|
+
|
|
8
|
+
log = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
class MinioArchiver:
|
|
11
|
+
"""
|
|
12
|
+
Class to handle minio object upload and validations.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
config <StorageProviderConfig> = minio configuration
|
|
16
|
+
bucket <str> = upload bucket
|
|
17
|
+
path <str> (optional) = specify bucket path for upload
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
MinioArchiver instance for archival use
|
|
21
|
+
"""
|
|
22
|
+
def __init__(self, config: StorageProviderConfig):
|
|
23
|
+
self._client = Minio(
|
|
24
|
+
config.host,
|
|
25
|
+
access_key=config.access_key,
|
|
26
|
+
secret_key=config.secret_key,
|
|
27
|
+
region=config.region
|
|
28
|
+
)
|
|
29
|
+
self.bucket = config.bucket
|
|
30
|
+
self.path = self._generate_path(config.path)
|
|
31
|
+
self._validate_bucket()
|
|
32
|
+
|
|
33
|
+
def _validate_bucket(self):
|
|
34
|
+
if not self._client.bucket_exists(self.bucket):
|
|
35
|
+
raise ValueError(f"Given bucket does not exist: {self.bucket}")
|
|
36
|
+
|
|
37
|
+
def _generate_path(self, path_name: Union[str, None]) -> str:
|
|
38
|
+
if path_name:
|
|
39
|
+
if path_name[-1] == '/':
|
|
40
|
+
return path_name[:-1]
|
|
41
|
+
return path_name
|
|
42
|
+
return ""
|
|
43
|
+
|
|
44
|
+
def upload_backup(self, local_file_path: str):
|
|
45
|
+
"""upload archive file to minio bucket"""
|
|
46
|
+
# this will be the name of the object to upload
|
|
47
|
+
# only get the file name not path
|
|
48
|
+
# we are going to use path provided by user for object storage
|
|
49
|
+
file_name = local_file_path.split("/")[-1]
|
|
50
|
+
if self.path:
|
|
51
|
+
object_path = f"{self.path}/{file_name}"
|
|
52
|
+
else:
|
|
53
|
+
object_path = file_name
|
|
54
|
+
result = self._client.fput_object(self.bucket, object_path, local_file_path)
|
|
55
|
+
log.info("""Created object: %s with tag: %s and version-id: %s""",
|
|
56
|
+
result.object_name, result.etag, result.version_id)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from typing import Dict, Union
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import logging
|
|
5
|
+
import tarfile
|
|
6
|
+
import shutil
|
|
7
|
+
from io import BytesIO
|
|
8
|
+
import gzip
|
|
9
|
+
|
|
10
|
+
from bookstack_file_exporter.common import util
|
|
11
|
+
|
|
12
|
+
log = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
def get_byte_response(url: str, headers: Dict[str, str]) -> bytes:
|
|
15
|
+
"""get byte response from http request"""
|
|
16
|
+
response = util.http_get_request(url=url, headers=headers)
|
|
17
|
+
return response.content
|
|
18
|
+
|
|
19
|
+
def write_bytes(base_tar_dir: str, file_path: str, data: bytes):
|
|
20
|
+
"""append byte data to tar file"""
|
|
21
|
+
with tarfile.open(base_tar_dir, "a") as tar:
|
|
22
|
+
data_obj = BytesIO(data)
|
|
23
|
+
tar_info = tarfile.TarInfo(name=file_path)
|
|
24
|
+
tar_info.size = data_obj.getbuffer().nbytes
|
|
25
|
+
log.debug("Adding file: %s with size: %d bytes to tar file", tar_info.name, tar_info.size)
|
|
26
|
+
tar.addfile(tar_info, fileobj=data_obj)
|
|
27
|
+
|
|
28
|
+
def get_json_bytes(data: Dict[str, Union[str, int]]) -> bytes:
|
|
29
|
+
"""dump dict to json file"""
|
|
30
|
+
return json.dumps(data, indent=4).encode('utf-8')
|
|
31
|
+
|
|
32
|
+
# set as function in case we want to do checks or final actions later
|
|
33
|
+
def remove_file(file_path: str):
|
|
34
|
+
"""remove a file"""
|
|
35
|
+
os.remove(file_path)
|
|
36
|
+
|
|
37
|
+
def create_gzip(tar_file: str, gzip_file: str, remove_old: bool = True):
|
|
38
|
+
"""create a gzip of an existing tar file and remove it"""
|
|
39
|
+
with open(tar_file, 'rb') as f_in:
|
|
40
|
+
with gzip.open(gzip_file, 'wb') as f_out:
|
|
41
|
+
shutil.copyfileobj(f_in, f_out)
|
|
42
|
+
if remove_old:
|
|
43
|
+
remove_file(tar_file)
|
|
File without changes
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Tuple, Dict
|
|
3
|
+
import requests
|
|
4
|
+
from requests.adapters import HTTPAdapter, Retry
|
|
5
|
+
|
|
6
|
+
log = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
def http_get_request(url: str, headers: Dict[str, str], timeout: int = 30) -> requests.Response:
|
|
9
|
+
"""make http requests and return response object"""
|
|
10
|
+
verify, url_prefix = should_verify(url)
|
|
11
|
+
try:
|
|
12
|
+
with requests.Session() as session:
|
|
13
|
+
# {backoff factor} * (2 ** ({number of previous retries}))
|
|
14
|
+
# {raise_on_status} if status falls in status_forcelist range
|
|
15
|
+
# and retries have been exhausted.
|
|
16
|
+
# {status_force_list} 429 is supposed to be included
|
|
17
|
+
retries = Retry(total=3,
|
|
18
|
+
backoff_factor=0.5,
|
|
19
|
+
raise_on_status=True,
|
|
20
|
+
status_forcelist=[ 500, 502, 503, 504 ])
|
|
21
|
+
session.mount(url_prefix, HTTPAdapter(max_retries=retries))
|
|
22
|
+
response = session.get(url, headers=headers, verify=verify, timeout=timeout)
|
|
23
|
+
except Exception as req_err:
|
|
24
|
+
log.error("Failed to make request for %s", url)
|
|
25
|
+
raise req_err
|
|
26
|
+
return response
|
|
27
|
+
|
|
28
|
+
def should_verify(url: str) -> Tuple[bool, str]:
|
|
29
|
+
"""check if http or https"""
|
|
30
|
+
if url.startswith("https://"):
|
|
31
|
+
return (True, "https://")
|
|
32
|
+
return (False, "http://")
|
|
File without changes
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import argparse
|
|
3
|
+
from typing import Dict, Tuple
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
import yaml
|
|
7
|
+
|
|
8
|
+
from bookstack_file_exporter.config_helper import models
|
|
9
|
+
from bookstack_file_exporter.config_helper.remote import StorageProviderConfig
|
|
10
|
+
|
|
11
|
+
log = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
_DEFAULT_HEADERS = {
|
|
15
|
+
'Content-Type': 'application/json; charset=utf-8'
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
_API_PATHS = {
|
|
19
|
+
"shelves": "api/shelves",
|
|
20
|
+
"books": "api/books",
|
|
21
|
+
"chapters": "api/chapters",
|
|
22
|
+
"pages": "api/pages"
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
_UNASSIGNED_BOOKS_DIR = "unassigned/"
|
|
26
|
+
|
|
27
|
+
_BASE_DIR_NAME = "bookstack_export"
|
|
28
|
+
|
|
29
|
+
_BOOKSTACK_TOKEN_FIELD ='BOOKSTACK_TOKEN_ID'
|
|
30
|
+
_BOOKSTACK_TOKEN_SECRET_FIELD='BOOKSTACK_TOKEN_SECRET'
|
|
31
|
+
_MINIO_ACCESS_KEY_FIELD='MINIO_ACCESS_KEY'
|
|
32
|
+
_MINIO_SECRET_KEY_FIELD='MINIO_SECRET_KEY'
|
|
33
|
+
|
|
34
|
+
## Normalize config from cli or from config file
|
|
35
|
+
class ConfigNode:
|
|
36
|
+
"""
|
|
37
|
+
Get Run Configuration from YAML file and normalize the data in an accessible object
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
Arg parse from user input
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
ConfigNode object with attributes that are
|
|
44
|
+
accessible for use for further downstream processes
|
|
45
|
+
|
|
46
|
+
Raises:
|
|
47
|
+
YAMLError: if provided configuration file is not valid YAML
|
|
48
|
+
|
|
49
|
+
ValueError: if improper arguments are given from user
|
|
50
|
+
"""
|
|
51
|
+
def __init__(self, args: argparse.Namespace):
|
|
52
|
+
self.unassigned_book_dir = _UNASSIGNED_BOOKS_DIR
|
|
53
|
+
self.user_inputs = self._generate_config(args.config_file)
|
|
54
|
+
self._base_dir_name = self._set_base_dir(args.output_dir)
|
|
55
|
+
self._token_id, self._token_secret = self._generate_credentials()
|
|
56
|
+
self._headers = self._generate_headers()
|
|
57
|
+
self._urls = self._generate_urls()
|
|
58
|
+
self._minio_access_key = ""
|
|
59
|
+
self._minio_secret_key = ""
|
|
60
|
+
self._object_storage_config = self._generate_remote_config()
|
|
61
|
+
|
|
62
|
+
def _generate_config(self, config_file: str) -> models.UserInput:
|
|
63
|
+
if not os.path.isfile(config_file):
|
|
64
|
+
raise FileNotFoundError(config_file)
|
|
65
|
+
with open(config_file, "r", encoding="utf-8") as yaml_stream:
|
|
66
|
+
try:
|
|
67
|
+
yaml_input = yaml.safe_load(yaml_stream)
|
|
68
|
+
except Exception as load_err:
|
|
69
|
+
# log here to make it easier to identify the issue
|
|
70
|
+
log.error("Failed to load yaml configuration file")
|
|
71
|
+
raise load_err
|
|
72
|
+
try:
|
|
73
|
+
user_inputs = models.UserInput(**yaml_input)
|
|
74
|
+
except Exception as err:
|
|
75
|
+
# log here to make it easier to identify the issue
|
|
76
|
+
log.error("Yaml configuration failed schema validation")
|
|
77
|
+
raise err
|
|
78
|
+
return user_inputs
|
|
79
|
+
|
|
80
|
+
def _generate_credentials(self) -> Tuple[str, str]:
|
|
81
|
+
# if user provided credentials in config file, load them
|
|
82
|
+
token_id = ""
|
|
83
|
+
token_secret = ""
|
|
84
|
+
if self.user_inputs.credentials:
|
|
85
|
+
token_id = self.user_inputs.credentials.token_id
|
|
86
|
+
token_secret = self.user_inputs.credentials.token_secret
|
|
87
|
+
|
|
88
|
+
# check to see if env var is specified, if so, it takes precedence
|
|
89
|
+
token_id = self._check_var(_BOOKSTACK_TOKEN_FIELD, token_id)
|
|
90
|
+
token_secret = self._check_var(_BOOKSTACK_TOKEN_SECRET_FIELD, token_secret)
|
|
91
|
+
return token_id, token_secret
|
|
92
|
+
|
|
93
|
+
def _generate_remote_config(self) -> Dict[str, StorageProviderConfig]:
|
|
94
|
+
object_config = {}
|
|
95
|
+
# check for optional minio credentials if configuration is set in yaml configuration file
|
|
96
|
+
if self.user_inputs.minio_config:
|
|
97
|
+
minio_access_key = self._check_var(_MINIO_ACCESS_KEY_FIELD,
|
|
98
|
+
self.user_inputs.minio_config.access_key)
|
|
99
|
+
minio_secret_key = self._check_var(_MINIO_SECRET_KEY_FIELD,
|
|
100
|
+
self.user_inputs.minio_config.secret_key)
|
|
101
|
+
object_config["minio"] = StorageProviderConfig(minio_access_key,
|
|
102
|
+
minio_secret_key, self.user_inputs.minio_config.bucket,
|
|
103
|
+
host=self.user_inputs.minio_config.host,
|
|
104
|
+
path=self.user_inputs.minio_config.path,
|
|
105
|
+
region=self.user_inputs.minio_config.region)
|
|
106
|
+
return object_config
|
|
107
|
+
|
|
108
|
+
def _generate_headers(self) -> Dict[str, str]:
|
|
109
|
+
headers = {}
|
|
110
|
+
# add additional_headers provided by user
|
|
111
|
+
if self.user_inputs.additional_headers:
|
|
112
|
+
for key, value in self.user_inputs.additional_headers.items():
|
|
113
|
+
headers[key] = value
|
|
114
|
+
|
|
115
|
+
# add default headers
|
|
116
|
+
for key, value in _DEFAULT_HEADERS.items():
|
|
117
|
+
# do not override if user added one already with same key
|
|
118
|
+
if key not in headers:
|
|
119
|
+
headers[key] = value
|
|
120
|
+
|
|
121
|
+
# do not override user provided one
|
|
122
|
+
if 'Authorization' not in headers:
|
|
123
|
+
headers['Authorization'] = f"Token {self._token_id}:{self._token_secret}"
|
|
124
|
+
return headers
|
|
125
|
+
|
|
126
|
+
def _generate_urls(self) -> Dict[str, str]:
|
|
127
|
+
urls = {}
|
|
128
|
+
# remove trailing slash
|
|
129
|
+
host = self.user_inputs.host
|
|
130
|
+
if host[-1] == '/':
|
|
131
|
+
host = host[:-1]
|
|
132
|
+
# check to see if http protocol is defined
|
|
133
|
+
if "http" not in self.user_inputs.host:
|
|
134
|
+
# use https by default
|
|
135
|
+
url_prefix = "https://"
|
|
136
|
+
else:
|
|
137
|
+
url_prefix = ""
|
|
138
|
+
for key, value in _API_PATHS.items():
|
|
139
|
+
urls[key] = f"{url_prefix}{self.user_inputs.host}/{value}"
|
|
140
|
+
return urls
|
|
141
|
+
|
|
142
|
+
def _set_base_dir(self, cmd_output_dir: str) -> str:
|
|
143
|
+
output_dir = self.user_inputs.output_path
|
|
144
|
+
# override if command line specified
|
|
145
|
+
if cmd_output_dir:
|
|
146
|
+
log.debug("Output directory overwritten by command line option")
|
|
147
|
+
output_dir = cmd_output_dir
|
|
148
|
+
# check if user provided an output path
|
|
149
|
+
if output_dir:
|
|
150
|
+
# detect trailing slash
|
|
151
|
+
# normalize to no trailing slash for later consistency
|
|
152
|
+
if output_dir[-1] == '/':
|
|
153
|
+
base_dir = f"{output_dir}{_BASE_DIR_NAME}"
|
|
154
|
+
else:
|
|
155
|
+
base_dir = f"{output_dir}/{_BASE_DIR_NAME}"
|
|
156
|
+
else:
|
|
157
|
+
base_dir = _BASE_DIR_NAME
|
|
158
|
+
return base_dir
|
|
159
|
+
|
|
160
|
+
@property
|
|
161
|
+
def headers(self) -> Dict[str, str]:
|
|
162
|
+
"""get generated headers"""
|
|
163
|
+
return self._headers
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def urls(self) -> Dict[str, str]:
|
|
167
|
+
"""get generated urls"""
|
|
168
|
+
return self._urls
|
|
169
|
+
|
|
170
|
+
@property
|
|
171
|
+
def base_dir_name(self) -> str:
|
|
172
|
+
"""get base dir of output target"""
|
|
173
|
+
return self._base_dir_name
|
|
174
|
+
|
|
175
|
+
@property
|
|
176
|
+
def object_storage_config(self) -> Dict[str, StorageProviderConfig]:
|
|
177
|
+
"""return remote storage configuration"""
|
|
178
|
+
return self._object_storage_config
|
|
179
|
+
|
|
180
|
+
@staticmethod
|
|
181
|
+
def _check_var(env_key: str, default_val: str) -> str:
|
|
182
|
+
"""
|
|
183
|
+
:param: env_key = the environment variable to check
|
|
184
|
+
:param: default_val = the default value if any to set if env variable not set
|
|
185
|
+
|
|
186
|
+
:return: env_key if present or default_val if not
|
|
187
|
+
:throws: ValueError if both parameters are empty.
|
|
188
|
+
"""
|
|
189
|
+
env_value = os.environ.get(env_key, "")
|
|
190
|
+
# env value takes precedence
|
|
191
|
+
if env_value:
|
|
192
|
+
log.debug("""env key: %s specified.
|
|
193
|
+
Will override configuration file value if set.""", env_key)
|
|
194
|
+
return env_value
|
|
195
|
+
# check for optional inputs, if env and input is missing
|
|
196
|
+
if not env_value and not default_val:
|
|
197
|
+
raise ValueError(f"""{env_key} is not specified in env and is
|
|
198
|
+
missing from configuration - at least one should be set""")
|
|
199
|
+
# fall back to configuration file value if present
|
|
200
|
+
return default_val
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from typing import Dict, Literal, List, Optional
|
|
2
|
+
from pydantic import BaseModel
|
|
3
|
+
|
|
4
|
+
# pylint: disable=R0903
|
|
5
|
+
|
|
6
|
+
class MinioConfig(BaseModel):
|
|
7
|
+
"""YAML schema for minio configuration"""
|
|
8
|
+
host: str
|
|
9
|
+
access_key: Optional[str] = None
|
|
10
|
+
secret_key: Optional[str] = None
|
|
11
|
+
bucket: str
|
|
12
|
+
path: Optional[str] = None
|
|
13
|
+
region: str
|
|
14
|
+
|
|
15
|
+
class BookstackAccess(BaseModel):
|
|
16
|
+
"""YAML schema for bookstack access credentials"""
|
|
17
|
+
token_id: str
|
|
18
|
+
token_secret: str
|
|
19
|
+
|
|
20
|
+
class UserInput(BaseModel):
|
|
21
|
+
"""YAML schema for user provided configuration file"""
|
|
22
|
+
host: str
|
|
23
|
+
additional_headers: Optional[Dict[str, str]] = None
|
|
24
|
+
credentials: Optional[BookstackAccess] = None
|
|
25
|
+
formats: List[Literal["markdown", "html", "pdf", "plaintext"]]
|
|
26
|
+
output_path: Optional[str] = None
|
|
27
|
+
export_meta: Optional[bool] = None
|
|
28
|
+
minio_config: Optional[MinioConfig] = None
|
|
29
|
+
clean_up: Optional[bool] = None
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
## convenience class
|
|
4
|
+
## able to work for minio, s3, etc.
|
|
5
|
+
class StorageProviderConfig:
|
|
6
|
+
"""
|
|
7
|
+
Convenience class to get dot notation for remote object storage
|
|
8
|
+
configuration access.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
access_key <str> = required token id
|
|
12
|
+
secret_key <str> = required secret token
|
|
13
|
+
bucket <str> = bucket to upload
|
|
14
|
+
host <str> (optional) = if provider requires a host/url
|
|
15
|
+
path <str> (optional) = specify bucket path for upload
|
|
16
|
+
region <str> (optional) = if provider requires region
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
StorageProviderConfig instance for dot notation access
|
|
20
|
+
"""
|
|
21
|
+
def __init__(self, access_key: str, secret_key: str, bucket: str,
|
|
22
|
+
host: Union[str, None]=None, path: Union[str, None]=None,
|
|
23
|
+
region: Union[str, None]=None):
|
|
24
|
+
self.host = host
|
|
25
|
+
self.access_key = access_key
|
|
26
|
+
self.secret_key = secret_key
|
|
27
|
+
self.bucket = bucket
|
|
28
|
+
self.path = path
|
|
29
|
+
self.region = region
|
|
File without changes
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
from typing import Dict, List
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
from bookstack_file_exporter.exporter import util
|
|
5
|
+
from bookstack_file_exporter.exporter.node import Node
|
|
6
|
+
|
|
7
|
+
log = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
class NodeExporter():
|
|
10
|
+
"""
|
|
11
|
+
NodeExporter class provides an interface to help create
|
|
12
|
+
Bookstack resources/nodes (pages, books, etc) and their relationships.
|
|
13
|
+
|
|
14
|
+
Raises:
|
|
15
|
+
|
|
16
|
+
ValueError if data returned from bookstack api is empty or not in desired format.
|
|
17
|
+
"""
|
|
18
|
+
def __init__(self, api_urls: Dict[str, str], headers: Dict[str,str]):
|
|
19
|
+
self.api_urls = api_urls
|
|
20
|
+
self.headers = headers
|
|
21
|
+
|
|
22
|
+
def get_all_shelves(self) -> Dict[int, Node]:
|
|
23
|
+
"""
|
|
24
|
+
Function to get all shelf Node instances
|
|
25
|
+
:returns: Dict[int, Node] for all shelf nodes
|
|
26
|
+
"""
|
|
27
|
+
base_url = self.api_urls["shelves"]
|
|
28
|
+
all_parents: List[int] = util.get_all_ids(base_url, self.headers)
|
|
29
|
+
if not all_parents:
|
|
30
|
+
log.warning("No shelves found in given Bookstack instance")
|
|
31
|
+
return {}
|
|
32
|
+
return self._get_parents(base_url, all_parents)
|
|
33
|
+
|
|
34
|
+
def _get_parents(self, base_url: str, parent_ids: List[int],
|
|
35
|
+
path_prefix: str = "") -> Dict[int, Node]:
|
|
36
|
+
parent_nodes = {}
|
|
37
|
+
for parent_id in parent_ids:
|
|
38
|
+
parent_url = f"{base_url}/{parent_id}"
|
|
39
|
+
parent_data = util.get_json_response(url=parent_url, headers=self.headers)
|
|
40
|
+
parent_nodes[parent_id] = Node(parent_data, path_prefix=path_prefix)
|
|
41
|
+
return parent_nodes
|
|
42
|
+
|
|
43
|
+
def get_chapter_nodes(self, book_nodes: Dict[int, Node]) -> Dict[int, Node]:
|
|
44
|
+
""" get chapter nodes """
|
|
45
|
+
# Chapters are treated a little differently
|
|
46
|
+
# They are under books like pages but have their own children
|
|
47
|
+
# i.e. not a terminal node
|
|
48
|
+
base_url = self.api_urls["chapters"]
|
|
49
|
+
all_chapters: List[int] = util.get_all_ids(base_url, self.headers)
|
|
50
|
+
if not all_chapters:
|
|
51
|
+
log.debug("No chapters found in given Bookstack instance")
|
|
52
|
+
return {}
|
|
53
|
+
return self._get_chapters(base_url, all_chapters, book_nodes)
|
|
54
|
+
|
|
55
|
+
def _get_chapters(self, base_url: str, all_chapters: List[int],
|
|
56
|
+
book_nodes: Dict[int, Node]) -> Dict[int, Node]:
|
|
57
|
+
chapter_nodes = {}
|
|
58
|
+
for chapter_id in all_chapters:
|
|
59
|
+
chapter_url = f"{base_url}/{chapter_id}"
|
|
60
|
+
chapter_data = util.get_json_response(url=chapter_url, headers=self.headers)
|
|
61
|
+
book_id = chapter_data['book_id']
|
|
62
|
+
chapter_nodes[chapter_id] = Node(chapter_data, book_nodes[book_id])
|
|
63
|
+
return chapter_nodes
|
|
64
|
+
|
|
65
|
+
def get_child_nodes(self, resource_type: str, parent_nodes: Dict[int, Node],
|
|
66
|
+
filter_empty: bool = True) -> Dict[int, Node]:
|
|
67
|
+
"""get child nodes from a book/chapter/shelf"""
|
|
68
|
+
base_url = self.api_urls[resource_type]
|
|
69
|
+
return self._get_children(base_url, parent_nodes, filter_empty)
|
|
70
|
+
|
|
71
|
+
def _get_children(self, base_url: str, parent_nodes: Dict[int, Node],
|
|
72
|
+
filter_empty: bool) -> Dict[int, Node]:
|
|
73
|
+
child_nodes = {}
|
|
74
|
+
for _, parent in parent_nodes.items():
|
|
75
|
+
if parent.children:
|
|
76
|
+
for child in parent.children:
|
|
77
|
+
child_id = child['id']
|
|
78
|
+
child_url = f"{base_url}/{child_id}"
|
|
79
|
+
child_data = util.get_json_response(url=child_url, headers=self.headers)
|
|
80
|
+
child_node = Node(child_data, parent)
|
|
81
|
+
if filter_empty:
|
|
82
|
+
if not child_node.empty:
|
|
83
|
+
child_nodes[child_id] = child_node
|
|
84
|
+
else:
|
|
85
|
+
child_nodes[child_id] = child_node
|
|
86
|
+
return child_nodes
|
|
87
|
+
|
|
88
|
+
def get_unassigned_books(self, existing_resources: Dict[int, Node],
|
|
89
|
+
path_prefix: str) -> Dict[int, Node]:
|
|
90
|
+
"""get books not under a shelf"""
|
|
91
|
+
base_url = self.api_urls["books"]
|
|
92
|
+
all_resources: List[int] = util.get_all_ids(url=base_url, headers=self.headers)
|
|
93
|
+
unassigned = []
|
|
94
|
+
# get all existing ones and compare against current known resources
|
|
95
|
+
for resource_id in all_resources:
|
|
96
|
+
if resource_id not in existing_resources:
|
|
97
|
+
unassigned.append(resource_id)
|
|
98
|
+
if not unassigned:
|
|
99
|
+
return {}
|
|
100
|
+
# books with no shelf treated like a parent resource
|
|
101
|
+
return self._get_parents(base_url, unassigned, path_prefix)
|
|
102
|
+
|
|
103
|
+
# convenience function
|
|
104
|
+
def get_all_books(self, shelve_nodes: Dict[int, Node], unassigned_dir: str) -> Dict[int, Node]:
|
|
105
|
+
"""get all books"""
|
|
106
|
+
book_nodes = {}
|
|
107
|
+
# get books in shelves
|
|
108
|
+
if shelve_nodes:
|
|
109
|
+
book_nodes = self.get_child_nodes("books", shelve_nodes)
|
|
110
|
+
# books with no shelve assignment
|
|
111
|
+
# default will be put in "unassigned" directory relative to backup dir
|
|
112
|
+
books_no_shelf = self.get_unassigned_books(book_nodes, unassigned_dir)
|
|
113
|
+
|
|
114
|
+
# add new book nodes to map
|
|
115
|
+
# these should not already be present in map
|
|
116
|
+
# since we started with shelves first and then moved our way down.
|
|
117
|
+
if books_no_shelf:
|
|
118
|
+
for key, value in books_no_shelf.items():
|
|
119
|
+
book_nodes[key] = value
|
|
120
|
+
|
|
121
|
+
return book_nodes
|
|
122
|
+
|
|
123
|
+
# convenience function
|
|
124
|
+
def get_all_pages(self, book_nodes: Dict[int, Node]) -> Dict[int, Node]:
|
|
125
|
+
"""get all pages and their content"""
|
|
126
|
+
## pages
|
|
127
|
+
page_nodes = {}
|
|
128
|
+
if book_nodes:
|
|
129
|
+
page_nodes: Dict[int, Node] = self.get_child_nodes("pages", book_nodes)
|
|
130
|
+
## chapters (if exists)
|
|
131
|
+
# chapter nodes are treated a little differently
|
|
132
|
+
# chapters are children under books
|
|
133
|
+
chapter_nodes: Dict[int, Node] = self.get_chapter_nodes(book_nodes)
|
|
134
|
+
# add chapter node pages
|
|
135
|
+
# replace existing page node if found with proper chapter parent
|
|
136
|
+
if chapter_nodes:
|
|
137
|
+
page_chapter_nodes: Dict[int, Node] = self.get_child_nodes("pages", chapter_nodes)
|
|
138
|
+
## since we filter empty, check if there is any content
|
|
139
|
+
## add all chapter pages to existing page nodes
|
|
140
|
+
if page_chapter_nodes:
|
|
141
|
+
for key, value in page_chapter_nodes.items():
|
|
142
|
+
page_nodes[key] = value
|
|
143
|
+
return page_nodes
|
|
144
|
+
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from typing import Dict, Union, List
|
|
2
|
+
|
|
3
|
+
# shelves --> 'books'
|
|
4
|
+
# books --> 'content'
|
|
5
|
+
# chapters --> 'pages'
|
|
6
|
+
_CHILD_KEYS = ['books', 'contents', 'pages']
|
|
7
|
+
|
|
8
|
+
_NULL_PAGE_NAME = "New Page"
|
|
9
|
+
|
|
10
|
+
class Node():
|
|
11
|
+
"""
|
|
12
|
+
Node class provides an interface to create bookstack child/parent
|
|
13
|
+
relationships for resources like pages, books, chapters, and shelves.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
metadata: Dict[str, Union[str, int]] (required)
|
|
17
|
+
= The metadata of the resource from bookstack api
|
|
18
|
+
parent: Union['Node', None] (optional)
|
|
19
|
+
= The parent resource if any, parent/children are also of the same class 'Node'.
|
|
20
|
+
path_prefix: Union[str, None] (optional)
|
|
21
|
+
= This appends a relative 'root' directory to the child resource path/file_name.
|
|
22
|
+
It is mainly used to prepend a shelve level
|
|
23
|
+
directory for books that are not assigned or under any shelf.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Node instance to help create and reference bookstack child/parent
|
|
27
|
+
relationships for resources like pages, books, chapters, and shelves.
|
|
28
|
+
|
|
29
|
+
"""
|
|
30
|
+
def __init__(self, meta: Dict[str, Union[str, int]],
|
|
31
|
+
parent: Union['Node', None] = None, path_prefix: str = ""):
|
|
32
|
+
self.meta = meta
|
|
33
|
+
self._parent = parent
|
|
34
|
+
self._path_prefix = path_prefix
|
|
35
|
+
# for convenience/usage for exporter
|
|
36
|
+
self.name: str = self.meta['slug']
|
|
37
|
+
self.id_: int = self.meta['id']
|
|
38
|
+
self._display_name = self.meta['name']
|
|
39
|
+
# children
|
|
40
|
+
self._children = self._get_children()
|
|
41
|
+
# if parent
|
|
42
|
+
self._file_path = self._get_file_path()
|
|
43
|
+
|
|
44
|
+
def _get_file_path(self) -> str:
|
|
45
|
+
if self._parent:
|
|
46
|
+
return f"{self._parent.file_path}/{self.name}"
|
|
47
|
+
return ""
|
|
48
|
+
|
|
49
|
+
def _get_children(self) -> List[Dict[str, Union[str, int]]]:
|
|
50
|
+
children = []
|
|
51
|
+
# find first match
|
|
52
|
+
for match in _CHILD_KEYS:
|
|
53
|
+
if match in self.meta:
|
|
54
|
+
children = self.meta[match]
|
|
55
|
+
break
|
|
56
|
+
return children
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def file_path(self):
|
|
60
|
+
"""get the base file path"""
|
|
61
|
+
# check to see if parent exists
|
|
62
|
+
if not self._file_path:
|
|
63
|
+
# return base path + name if no parent
|
|
64
|
+
return f"{self._path_prefix}{self.name}"
|
|
65
|
+
# if parent exists
|
|
66
|
+
# return the combined path
|
|
67
|
+
return f"{self._path_prefix}{self._file_path}"
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def children(self):
|
|
71
|
+
"""return all children of a book/chapter/shelf"""
|
|
72
|
+
return self._children
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def empty(self):
|
|
76
|
+
"""return True if page node lacks content"""
|
|
77
|
+
if not self.name and self._display_name == _NULL_PAGE_NAME:
|
|
78
|
+
return True
|
|
79
|
+
return False
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from typing import Dict, Union, List
|
|
2
|
+
import logging
|
|
3
|
+
from bookstack_file_exporter.common import util
|
|
4
|
+
|
|
5
|
+
log = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
def get_json_response(url: str, headers: Dict[str, str]) -> List[Dict[str, Union[str,int]]]:
|
|
8
|
+
"""get http response data in json format"""
|
|
9
|
+
response = util.http_get_request(url=url, headers=headers)
|
|
10
|
+
return response.json()
|
|
11
|
+
|
|
12
|
+
def get_all_ids(url: str, headers: Dict[str, str]) -> List[int]:
|
|
13
|
+
"""get all ids for a bookstack resource"""
|
|
14
|
+
ids_api_meta = get_json_response(url=url, headers=headers)
|
|
15
|
+
if ids_api_meta:
|
|
16
|
+
return [item['id'] for item in ids_api_meta['data']]
|
|
17
|
+
return []
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import sys
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Dict
|
|
5
|
+
|
|
6
|
+
from bookstack_file_exporter.config_helper.config_helper import ConfigNode
|
|
7
|
+
from bookstack_file_exporter.exporter.node import Node
|
|
8
|
+
from bookstack_file_exporter.exporter.exporter import NodeExporter
|
|
9
|
+
from bookstack_file_exporter.archiver.archiver import Archiver
|
|
10
|
+
|
|
11
|
+
log = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
def exporter(args: argparse.Namespace):
|
|
14
|
+
"""export bookstack nodes and archive locally and/or remotely"""
|
|
15
|
+
## get configuration from helper
|
|
16
|
+
config = ConfigNode(args)
|
|
17
|
+
|
|
18
|
+
## convenience vars
|
|
19
|
+
bookstack_headers = config.headers
|
|
20
|
+
api_urls = config.urls
|
|
21
|
+
export_formats = config.user_inputs.formats
|
|
22
|
+
unassigned_dir = config.unassigned_book_dir
|
|
23
|
+
page_base_url = config.urls['pages']
|
|
24
|
+
base_export_dir = config.base_dir_name
|
|
25
|
+
|
|
26
|
+
#### Export Data #####
|
|
27
|
+
# need to implement pagination for apis
|
|
28
|
+
log.info("Beginning export")
|
|
29
|
+
|
|
30
|
+
## Use exporter class to get all the resources (pages, books, etc.) and their relationships
|
|
31
|
+
export_helper = NodeExporter(api_urls, bookstack_headers)
|
|
32
|
+
## shelves
|
|
33
|
+
shelve_nodes: Dict[int, Node] = export_helper.get_all_shelves()
|
|
34
|
+
## books
|
|
35
|
+
book_nodes: Dict[int, Node] = export_helper.get_all_books(shelve_nodes, unassigned_dir)
|
|
36
|
+
## pages
|
|
37
|
+
page_nodes: Dict[int, Node] = export_helper.get_all_pages(book_nodes)
|
|
38
|
+
if not page_nodes:
|
|
39
|
+
log.warning("No page data available from given Bookstack instance. Nothing to archive")
|
|
40
|
+
sys.exit(0)
|
|
41
|
+
log.info("Beginning archive")
|
|
42
|
+
## start archive ##
|
|
43
|
+
archive: Archiver = Archiver(base_export_dir, config.user_inputs.export_meta,
|
|
44
|
+
page_base_url, bookstack_headers)
|
|
45
|
+
# create tar
|
|
46
|
+
archive.archive(page_nodes, export_formats)
|
|
47
|
+
# archive to remote targets
|
|
48
|
+
archive.archive_remote(config.object_storage_config)
|
|
49
|
+
# if remote target is specified and clean is true
|
|
50
|
+
# clean up the .tgz archive since it is already uploaded
|
|
51
|
+
archive.clean_up(config.user_inputs.clean_up)
|
|
52
|
+
|
|
53
|
+
log.info("Completed run")
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
LOG_LEVEL = {
|
|
5
|
+
'debug': logging.DEBUG,
|
|
6
|
+
'info': logging.INFO,
|
|
7
|
+
'warning': logging.WARNING,
|
|
8
|
+
'error': logging.ERROR
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
def get_log_level(log_level:str) -> int:
|
|
12
|
+
"""return log level int"""
|
|
13
|
+
return LOG_LEVEL.get(log_level)
|
|
14
|
+
|
|
15
|
+
def get_args() -> argparse.Namespace:
|
|
16
|
+
"""return user cmd line options"""
|
|
17
|
+
parser = argparse.ArgumentParser(description='BookStack File Exporter')
|
|
18
|
+
parser.add_argument('-c',
|
|
19
|
+
'--config-file',
|
|
20
|
+
type=str,
|
|
21
|
+
default="data/config.yml",
|
|
22
|
+
help='''Provide a configuration file (full or relative path).
|
|
23
|
+
See README for more details''')
|
|
24
|
+
parser.add_argument('-o',
|
|
25
|
+
'--output-dir',
|
|
26
|
+
type=str,
|
|
27
|
+
default="",
|
|
28
|
+
help='''Optional, specify an output directory.
|
|
29
|
+
This can also be specified in the config.yml file''')
|
|
30
|
+
parser.add_argument('-v',
|
|
31
|
+
'--log-level',
|
|
32
|
+
type=str.lower,
|
|
33
|
+
default='info',
|
|
34
|
+
help='Set verbosity level for logging.',
|
|
35
|
+
choices=LOG_LEVEL.keys())
|
|
36
|
+
return parser.parse_args()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 homeylab
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: bookstack-file-exporter
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: An exporter written in python to export all documents from a bookstack instance in different formats
|
|
5
|
+
Home-page: https://github.com/homeylab/bookstack-file-exporter
|
|
6
|
+
Author: pchang388
|
|
7
|
+
License: MIT License
|
|
8
|
+
Keywords: bookstack,exporter
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: Pyyaml >=6.0.1
|
|
14
|
+
Requires-Dist: Pydantic >=2.3.0
|
|
15
|
+
Requires-Dist: requests >=2.31.0
|
|
16
|
+
Requires-Dist: minio >=7.1.16
|
|
17
|
+
|
|
18
|
+
# bookstack-file-exporter
|
|
19
|
+
|
|
20
|
+
_This is project is still under active development. Functionality is there and is relatively stable at this time._
|
|
21
|
+
|
|
22
|
+
This tool provides a way to export Bookstack pages in a folder-tree layout locally with an option to push to remote object storage locations.
|
|
23
|
+
|
|
24
|
+
This small project was mainly created to run as a cron job in k8s but works anywhere. This would allow me to export my docs in markdown, or other formats like pdf. I use Bookstack's markdown editor as default instead of WYSIWYG editor and this makes my notes portable anywhere even if offline.
|
|
25
|
+
|
|
26
|
+
The main use case is to backup all docs in a folder-tree format to cover the scenarios:
|
|
27
|
+
|
|
28
|
+
1. Offline copy wanted.
|
|
29
|
+
2. Back up at a file level as an accessory or alternative to disk and volume backups.
|
|
30
|
+
3. Share docs with another person to keep locally.
|
|
31
|
+
4. Migrate to Markdown documenting for simplicity.
|
|
32
|
+
5. Provide an easy way to do automated file backups locally, in docker, or kubernetes.
|
|
33
|
+
|
|
34
|
+
Supported backup formats are
|
|
35
|
+
|
|
36
|
+
1. local
|
|
37
|
+
2. minio
|
|
38
|
+
3. s3 (Not Yet Implemented)
|
|
39
|
+
|
|
40
|
+
Backups are exported in `.tgz` format and generated based off timestamp. Export names will be in the format: `%Y-%m-%d_%H-%M-%S` (Year-Month-Day_Hour-Minute-Second). *Files are first pulled locally to create the tarball and then can be sent to object storage if needed*. Example file name: `bookstack_export_2023-09-22_07-19-54.tgz`.
|
|
41
|
+
|
|
42
|
+
This script can be run directly via cli as a pip module.
|
|
43
|
+
```
|
|
44
|
+
# if you already have python bin directory in your path
|
|
45
|
+
bookstack-file-exporter -c <path_to_config_file>
|
|
46
|
+
|
|
47
|
+
# using pip
|
|
48
|
+
python -m bookstack_file_exporter -c <path_to_config_file>
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Using This Application
|
|
52
|
+
|
|
53
|
+
### Run via Pip
|
|
54
|
+
Note: This application is tested and developed on Python `3.11.X`. It will probably work for >= `3.8` but is recommended to install (or set up a venv) a `3.11.X` version.
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
python -m pip install bookstack-file-exporter
|
|
58
|
+
|
|
59
|
+
# if you already have python bin directory in your path
|
|
60
|
+
bookstack-file-exporter -c <path_to_config_file>
|
|
61
|
+
|
|
62
|
+
# using pip
|
|
63
|
+
python -m bookstack_file_exporter -c <path_to_config_file>
|
|
64
|
+
```
|
|
65
|
+
Command line options:
|
|
66
|
+
| option | required | description |
|
|
67
|
+
| ------ | -------- | ----------- |
|
|
68
|
+
|`-c`, `--config-file`|True|Relative or Absolute path to a valid configuration file. This configuration file is checked against a schema for validation.|
|
|
69
|
+
|`-v`, `--log-level` |False, default: info|Provide a valid log level: info, debug, warning, error.|
|
|
70
|
+
|
|
71
|
+
### Run Via Docker
|
|
72
|
+
Example
|
|
73
|
+
```bash
|
|
74
|
+
docker run \
|
|
75
|
+
--user ${USER_ID}:${USER_GID} \
|
|
76
|
+
-v $(pwd)/local/config.yml:/export/config/config.yml:ro \
|
|
77
|
+
-v $(pwd)/bkps:/export/dump \
|
|
78
|
+
bookstack-file-exporter:0.0.1
|
|
79
|
+
```
|
|
80
|
+
Required Options:
|
|
81
|
+
| option | description |
|
|
82
|
+
| `config.yml` file mount | Provide a valid configuration file. Specified in example as read only: `-v ${CURDIR}/local/config.yml:/export/config/config.yml:ro`, `${USER_LOCAL_PATH}:${STATIC_DOCKER_PATH}` |
|
|
83
|
+
| `dump` file mount | Directory to place exports. Specified in example: `-v ${CURDIR}/bkps:/export/dump`, `${USER_LOCAL_PATH}:${STATIC_DOCKER_PATH}` |
|
|
84
|
+
|
|
85
|
+
Tokens and other options can be specified, example:
|
|
86
|
+
```bash
|
|
87
|
+
# '-e' flag for env vars
|
|
88
|
+
# --user flag to override the uid/gid for created files
|
|
89
|
+
docker run \
|
|
90
|
+
-e LOG_LEVEL='debug' \
|
|
91
|
+
-e BOOKSTACK_TOKEN_ID='xyz' \
|
|
92
|
+
-e BOOKSTACK_TOKEN_SECRET='xyz' \
|
|
93
|
+
--user 1000:1000 \
|
|
94
|
+
-v $(pwd)/local/config.yml:/export/config/config.yml:ro \
|
|
95
|
+
-v $(pwd):/export/dump \
|
|
96
|
+
bookstack-file-exporter:0.0.1
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Authentication
|
|
100
|
+
**Note visibility of pages is based on user**, so use a user that has access to pages you want to back up
|
|
101
|
+
|
|
102
|
+
Ref: [https://demo.bookstackapp.com/api/docs#authentication](https://demo.bookstackapp.com/api/docs#authentication)
|
|
103
|
+
|
|
104
|
+
Provide a tokenId and a tokenSecret as environment variables or directly in the configuration file.
|
|
105
|
+
- `BOOKSTACK_TOKEN_ID`
|
|
106
|
+
- `BOOKSTACK_TOKEN_SECRET`
|
|
107
|
+
|
|
108
|
+
For object storage authentication, find the relevant sections further down in this document.
|
|
109
|
+
|
|
110
|
+
### Configuration file
|
|
111
|
+
See below for an example and explanation. Optionally, look at `examples/` folder for more.
|
|
112
|
+
|
|
113
|
+
Schema and values are checked so ensure proper settings are provided.
|
|
114
|
+
```
|
|
115
|
+
# if http/https not specified, defaults to https
|
|
116
|
+
# if you put http here, it will try verify=false, to not check certs
|
|
117
|
+
host: "https://bookstack.yourdomain.com"
|
|
118
|
+
|
|
119
|
+
# You could optionally set the bookstack token_id and token_secret here instead of env
|
|
120
|
+
# If env variable is also supplied, env variable will take precedence
|
|
121
|
+
credentials:
|
|
122
|
+
token_id: ""
|
|
123
|
+
token_secret: ""
|
|
124
|
+
|
|
125
|
+
# additional headers to add, examples below
|
|
126
|
+
additional_headers:
|
|
127
|
+
test: "test"
|
|
128
|
+
test2: "test2"
|
|
129
|
+
User-Agent: "test-agent"
|
|
130
|
+
|
|
131
|
+
# supported formats from bookstack below
|
|
132
|
+
# valid formats: markdown, html, pdf, plaintext
|
|
133
|
+
# you can specify one or as many as you'd like
|
|
134
|
+
formats:
|
|
135
|
+
- markdown
|
|
136
|
+
- html
|
|
137
|
+
- pdf
|
|
138
|
+
- plaintext
|
|
139
|
+
|
|
140
|
+
# optional minio configuration
|
|
141
|
+
# If not required, you should omit/comment out the section
|
|
142
|
+
# You can specify env vars instead for access and secret key
|
|
143
|
+
# See Minio Backups section of this doc for more info on required fields
|
|
144
|
+
minio_config:
|
|
145
|
+
host: "minio.yourdomain.com"
|
|
146
|
+
access_key: ""
|
|
147
|
+
secret_key: ""
|
|
148
|
+
region: "us-east-1"
|
|
149
|
+
bucket: "mybucket"
|
|
150
|
+
path: "bookstack/backups"
|
|
151
|
+
|
|
152
|
+
# output directory for the exported archive
|
|
153
|
+
# relative or full path
|
|
154
|
+
# User who runs the command should have access to write and create sub folders in this directory
|
|
155
|
+
# optional, if not provided, will use current run directory by default
|
|
156
|
+
output_path: "bkps/"
|
|
157
|
+
|
|
158
|
+
# optional export of metadata about the page in a json file
|
|
159
|
+
# this metadata contains general information about the page
|
|
160
|
+
# like: last update, owner, revision count, etc.
|
|
161
|
+
# omit this or set to false if not needed
|
|
162
|
+
export_meta: true
|
|
163
|
+
|
|
164
|
+
# optional if using object storage targets
|
|
165
|
+
# After uploading to object storage targets, choose to clean up local files
|
|
166
|
+
# delete the archive from local filesystem
|
|
167
|
+
# will not be cleaned up if set to false or omitted
|
|
168
|
+
clean_up: true
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### Backup Behavior
|
|
172
|
+
We will use slug names (from Bookstack API) by default, as such certain characters like `!`, `/` will be ignored and spaces replaced.
|
|
173
|
+
|
|
174
|
+
All sub directories will be created as required during the export process.
|
|
175
|
+
|
|
176
|
+
```
|
|
177
|
+
Shelves --> Books --> Chapters --> Pages
|
|
178
|
+
|
|
179
|
+
## Example
|
|
180
|
+
kafka
|
|
181
|
+
---> controller
|
|
182
|
+
---> settings
|
|
183
|
+
---> logs (chapter)
|
|
184
|
+
---> retention.md
|
|
185
|
+
---> compression.pdf
|
|
186
|
+
---> something.html
|
|
187
|
+
---> other.txt
|
|
188
|
+
---> optional
|
|
189
|
+
---> main
|
|
190
|
+
---> deploy
|
|
191
|
+
---> broker
|
|
192
|
+
---> settings
|
|
193
|
+
---> deploy
|
|
194
|
+
---> schema-registry
|
|
195
|
+
---> protobuf
|
|
196
|
+
---> settings
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
Books without a shelf will be put in a shelve folder named `unassigned`.
|
|
200
|
+
|
|
201
|
+
Empty/New Pages will be ignored since they have not been modified yet from creation and are empty but also do not have a valid slug. Example:
|
|
202
|
+
```
|
|
203
|
+
{
|
|
204
|
+
...
|
|
205
|
+
"name": "New Page",
|
|
206
|
+
"slug": "",
|
|
207
|
+
...
|
|
208
|
+
}
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
You may notice some directories (books) and/or files (pages) in the archive have a random string at the end, example - `nKA`: `user-and-group-management-nKA`. This is expected and is because there were resources with the same name created in another shelve and bookstack adds a string at the end to ensure uniqueness.
|
|
212
|
+
|
|
213
|
+
### Minio Backups
|
|
214
|
+
When specifying `minio_config` in the configuration file, these fields are required in the file:
|
|
215
|
+
```
|
|
216
|
+
# a host/ip + port combination is also allowed
|
|
217
|
+
# example: "minio.yourdomain.com:8443"
|
|
218
|
+
host: "minio.yourdomain.com"
|
|
219
|
+
|
|
220
|
+
# this is required since minio api appears to require it
|
|
221
|
+
# set to the region your bucket resides in
|
|
222
|
+
# if unsure, try "us-east-1" first
|
|
223
|
+
region: "us-east-1"
|
|
224
|
+
|
|
225
|
+
# bucket to upload to
|
|
226
|
+
bucket "mybucket"
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
These fields are optional:
|
|
230
|
+
```
|
|
231
|
+
# access key for the minio instance
|
|
232
|
+
# optionally set as env variable instead
|
|
233
|
+
access_key: ""
|
|
234
|
+
|
|
235
|
+
# secret key for the minio instance
|
|
236
|
+
# optionally set as env variable instead
|
|
237
|
+
secret_key: ""
|
|
238
|
+
|
|
239
|
+
# the path of the backup
|
|
240
|
+
# in example below, the exported archive will appear in: `<bucket_name>:/bookstack/backups/bookstack-<timestamp>.tgz`
|
|
241
|
+
path: "bookstack/backups"
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
As mentioned you can optionally set access and secret key as env variables. If both are specified, env variable will take precedence.
|
|
245
|
+
- `MINIO_ACCESS_KEY`
|
|
246
|
+
- `MINIO_SECRET_KEY`
|
|
247
|
+
|
|
248
|
+
## Future Items
|
|
249
|
+
1. Be able to pull media/photos locally and place in their respective page folders for a more complete file level backup.
|
|
250
|
+
2. Include the exporter in a maintained helm chart as an optional deployment. The helm chart is [here](https://github.com/homeylab/helm-charts/tree/main/charts/bookstack).
|
|
251
|
+
3. Export S3 or more options.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
bookstack_file_exporter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
bookstack_file_exporter/__main__.py,sha256=2jPiBcxzY7pPM26wKrVAKgNmwjfnG3KjrdruwbTb1AE,442
|
|
3
|
+
bookstack_file_exporter/run.py,sha256=-8eNYa5YJSTYVR1lbplMKUBudFoLYsN9WS_PjD7fSA8,2045
|
|
4
|
+
bookstack_file_exporter/run_args.py,sha256=a79JqV7pmA47vTdE-2IhVX-xeeYiJKRH4RClrDMOfPg,1248
|
|
5
|
+
bookstack_file_exporter/archiver/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
bookstack_file_exporter/archiver/archiver.py,sha256=VYRRjXOs7D2LxaMhhG_wsmLzAJEfK7uVifCpdkyvVxQ,5226
|
|
7
|
+
bookstack_file_exporter/archiver/minio_archiver.py,sha256=jpddpNE6WMKaYiJs-jcEZF4o0MUcTVLrTkjFJKyH0eA,1923
|
|
8
|
+
bookstack_file_exporter/archiver/util.py,sha256=sb9h0GrGzUahkEniNQ1C4C-ofFzwMvXIrdosm-spVKk,1480
|
|
9
|
+
bookstack_file_exporter/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
bookstack_file_exporter/common/util.py,sha256=2S_BIx05eu_mjKSrbcvNVYEVCS0yheIEcatU0n7Njkg,1333
|
|
11
|
+
bookstack_file_exporter/config_helper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
bookstack_file_exporter/config_helper/config_helper.py,sha256=INSCPT0GyYcS87VnSLMqBgYzB3OEX7moyTCgNM5Us1g,7688
|
|
13
|
+
bookstack_file_exporter/config_helper/models.py,sha256=StoywdzjCNJjqaI7CQm9d1GBAdEwUCx7qfWfASo_ezg,898
|
|
14
|
+
bookstack_file_exporter/config_helper/remote.py,sha256=ssGekxW9tN91BFuGuKLIgeHKHRbMNtZjiM4LEuesxcE,1013
|
|
15
|
+
bookstack_file_exporter/exporter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
bookstack_file_exporter/exporter/exporter.py,sha256=5KA4h1smdD4EhVk6-cfnDNUWp25Ab0x28sqX8jlL1HU,6324
|
|
17
|
+
bookstack_file_exporter/exporter/node.py,sha256=-7YOUtVdnLPO9-uy3rRWiV1J9Ivnqfxe1FflYW-ZE_k,2708
|
|
18
|
+
bookstack_file_exporter/exporter/util.py,sha256=_En8NGDcUldfuaPW3wrf8RSX6dj_1kNTk4LCIDG1F-8,640
|
|
19
|
+
bookstack_file_exporter-0.0.1.dist-info/LICENSE,sha256=ToZ-JOFE6-SiD4z5P0cA21eBwTvEWxlBcKAGWMXbM5o,1065
|
|
20
|
+
bookstack_file_exporter-0.0.1.dist-info/METADATA,sha256=ZCawbU4fraw6Ni3e-i1pkLenWIzj1D-91HGd0hR39JU,9069
|
|
21
|
+
bookstack_file_exporter-0.0.1.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
22
|
+
bookstack_file_exporter-0.0.1.dist-info/entry_points.txt,sha256=0-7syMTwEqR4OFDVTbNLVa0OWRJk801CWVEqcZasPWo,82
|
|
23
|
+
bookstack_file_exporter-0.0.1.dist-info/top_level.txt,sha256=o_iIJ9azW-HvDMZdAEKgpTAc3SVYA3z9npCxUYo5Xtw,24
|
|
24
|
+
bookstack_file_exporter-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
bookstack_file_exporter
|