bookstack-file-exporter 2.0.0__tar.gz → 2.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bookstack_file_exporter-2.0.0/bookstack_file_exporter.egg-info → bookstack_file_exporter-2.2.0}/PKG-INFO +65 -29
- bookstack_file_exporter-2.0.0/PKG-INFO → bookstack_file_exporter-2.2.0/README.md +57 -43
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/archiver/archiver.py +69 -21
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/archiver/asset_archiver.py +55 -41
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/archiver/minio_archiver.py +7 -12
- bookstack_file_exporter-2.2.0/bookstack_file_exporter/archiver/node_archiver.py +406 -0
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/archiver/util.py +1 -2
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/common/util.py +45 -42
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/config_helper/config_helper.py +15 -50
- bookstack_file_exporter-2.2.0/bookstack_file_exporter/config_helper/models.py +79 -0
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/config_helper/notifications.py +2 -6
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/config_helper/remote.py +1 -1
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/exporter/exporter.py +22 -25
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/exporter/node.py +3 -7
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/notify/handler.py +1 -2
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/notify/notifiers.py +2 -3
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/run.py +37 -21
- bookstack_file_exporter-2.0.0/README.md → bookstack_file_exporter-2.2.0/bookstack_file_exporter.egg-info/PKG-INFO +80 -17
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter.egg-info/SOURCES.txt +2 -23
- bookstack_file_exporter-2.2.0/bookstack_file_exporter.egg-info/requires.txt +7 -0
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter.egg-info/top_level.txt +0 -1
- bookstack_file_exporter-2.2.0/pyproject.toml +63 -0
- bookstack_file_exporter-2.2.0/setup.cfg +4 -0
- bookstack_file_exporter-2.0.0/bookstack_file_exporter/archiver/page_archiver.py +0 -221
- bookstack_file_exporter-2.0.0/bookstack_file_exporter/config_helper/models.py +0 -73
- bookstack_file_exporter-2.0.0/bookstack_file_exporter.egg-info/requires.txt +0 -13
- bookstack_file_exporter-2.0.0/pyproject.toml +0 -23
- bookstack_file_exporter-2.0.0/setup.cfg +0 -41
- bookstack_file_exporter-2.0.0/tests/__init__.py +0 -2
- bookstack_file_exporter-2.0.0/tests/conftest.py +0 -165
- bookstack_file_exporter-2.0.0/tests/fixtures/__init__.py +0 -2
- bookstack_file_exporter-2.0.0/tests/fixtures/mock_config.py +0 -19
- bookstack_file_exporter-2.0.0/tests/helpers.py +0 -10
- bookstack_file_exporter-2.0.0/tests/integration/__init__.py +0 -2
- bookstack_file_exporter-2.0.0/tests/integration/test_empty_bookstack.py +0 -30
- bookstack_file_exporter-2.0.0/tests/integration/test_full_traversal.py +0 -187
- bookstack_file_exporter-2.0.0/tests/unit/__init__.py +0 -2
- bookstack_file_exporter-2.0.0/tests/unit/test_archiver.py +0 -355
- bookstack_file_exporter-2.0.0/tests/unit/test_archiver_util.py +0 -196
- bookstack_file_exporter-2.0.0/tests/unit/test_asset_archiver_config.py +0 -124
- bookstack_file_exporter-2.0.0/tests/unit/test_asset_archiver_core.py +0 -253
- bookstack_file_exporter-2.0.0/tests/unit/test_asset_archiver_html.py +0 -245
- bookstack_file_exporter-2.0.0/tests/unit/test_asset_archiver_modify_html.py +0 -376
- bookstack_file_exporter-2.0.0/tests/unit/test_common_util.py +0 -44
- bookstack_file_exporter-2.0.0/tests/unit/test_http_helper.py +0 -255
- bookstack_file_exporter-2.0.0/tests/unit/test_node.py +0 -271
- bookstack_file_exporter-2.0.0/tests/unit/test_node_exporter.py +0 -328
- bookstack_file_exporter-2.0.0/tests/unit/test_page_archiver.py +0 -223
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/LICENSE +0 -0
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/__init__.py +0 -0
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/__main__.py +0 -0
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/archiver/__init__.py +0 -0
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/common/__init__.py +0 -0
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/config_helper/__init__.py +0 -0
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/exporter/__init__.py +0 -0
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/notify/__init__.py +0 -0
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/run_args.py +0 -0
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter.egg-info/dependency_links.txt +0 -0
- {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter.egg-info/entry_points.txt +0 -0
|
@@ -1,28 +1,23 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bookstack-file-exporter
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.2.0
|
|
4
4
|
Summary: An exporter written in python to export all documents from a bookstack instance in different formats
|
|
5
|
-
Home-page: https://github.com/homeylab/bookstack-file-exporter
|
|
6
5
|
Author: pchang388
|
|
7
6
|
License: MIT License
|
|
7
|
+
Project-URL: Homepage, https://github.com/homeylab/bookstack-file-exporter
|
|
8
8
|
Keywords: bookstack,exporter
|
|
9
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Requires-Python: >=3.
|
|
11
|
+
Requires-Python: >=3.11
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
|
-
Requires-Dist: Pyyaml>=6.0.
|
|
14
|
+
Requires-Dist: Pyyaml>=6.0.3
|
|
15
15
|
Requires-Dist: Pydantic>=2.13.4
|
|
16
16
|
Requires-Dist: beautifulsoup4>=4.14.3
|
|
17
|
-
Requires-Dist: requests>=2.
|
|
18
|
-
Requires-Dist: minio>=7.2.
|
|
19
|
-
Requires-Dist: apprise>=1.
|
|
20
|
-
Requires-Dist: markdown-it-py>=
|
|
21
|
-
Provides-Extra: dev
|
|
22
|
-
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
23
|
-
Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
24
|
-
Requires-Dist: responses>=0.25; extra == "dev"
|
|
25
|
-
Requires-Dist: pylint; extra == "dev"
|
|
17
|
+
Requires-Dist: requests>=2.34.2
|
|
18
|
+
Requires-Dist: minio>=7.2.20
|
|
19
|
+
Requires-Dist: apprise>=1.10.0
|
|
20
|
+
Requires-Dist: markdown-it-py>=4.2.0
|
|
26
21
|
Dynamic: license-file
|
|
27
22
|
|
|
28
23
|
# bookstack-file-exporter
|
|
@@ -53,7 +48,7 @@ Table of Contents
|
|
|
53
48
|
## Background
|
|
54
49
|
_If you encounter any issues, want to request an additional feature, or provide assistance, feel free to open a Github issue._
|
|
55
50
|
|
|
56
|
-
This tool provides a way to export [Bookstack](https://github.com/BookStackApp/BookStack) pages and their content (_text, images, attachments, metadata, etc._) into a relational parent-child layout locally with an option to push to remote object storage locations. See [Backup Behavior](#backup-behavior) section for more details on how pages are organized. Image and attachment links can also be modified in markdown exports to point to local exported paths.
|
|
51
|
+
This tool provides a way to export [Bookstack](https://github.com/BookStackApp/BookStack) pages and their content (_text, images, attachments, metadata, etc._) into a relational parent-child layout locally with an option to push to remote object storage locations. See [Backup Behavior](#backup-behavior) section for more details on how pages are organized. Image and attachment links can also be modified in markdown and html exports to point to local exported paths.
|
|
57
52
|
|
|
58
53
|
This small project was mainly created to run as a cron job in k8s but works anywhere. This tool allows me to export my docs in markdown, or other formats like pdf. I use Bookstack's markdown editor as default instead of WYSIWYG editor and this makes my notes portable anywhere even if offline.
|
|
59
54
|
|
|
@@ -118,15 +113,18 @@ assets:
|
|
|
118
113
|
```
|
|
119
114
|
|
|
120
115
|
### Run via Pip
|
|
121
|
-
The exporter can be installed via pip and run directly.
|
|
116
|
+
The exporter can be installed via pip (or [uv](https://docs.astral.sh/uv/)) and run directly.
|
|
122
117
|
|
|
123
118
|
#### Python Version
|
|
124
|
-
_Note: This application is tested and developed on Python version `3.
|
|
119
|
+
_Note: This application is tested and developed on Python version `3.14.5`. The min required version is >= `3.11` but is recommended to install (or set up a venv) a `3.14.5` version._
|
|
125
120
|
|
|
126
121
|
#### Examples
|
|
127
122
|
```bash
|
|
128
123
|
python -m pip install bookstack-file-exporter
|
|
129
124
|
|
|
125
|
+
# or with uv:
|
|
126
|
+
uv pip install bookstack-file-exporter
|
|
127
|
+
|
|
130
128
|
# if you prefer a specific version, example:
|
|
131
129
|
python -m pip install bookstack-file-exporter==X.X.X
|
|
132
130
|
|
|
@@ -306,13 +304,14 @@ More descriptions can be found for each section below:
|
|
|
306
304
|
| `credentials` | `object` | `false` | Optional section where Bookstack tokenId and tokenSecret can be specified. Env variable for credentials may be supplied instead. See [Authentication](#authentication) for more details. |
|
|
307
305
|
| `credentials.token_id` | `str`| `false` if specified through env var instead, otherwise `true` | A valid Bookstack tokenId. |
|
|
308
306
|
| `credentials.token_secret` | `str` | `false` if specified through env var instead, otherwise `true` | A valid Bookstack tokenSecret. |
|
|
309
|
-
| `formats` | `list<str>` | `true` | Which export formats to use for
|
|
307
|
+
| `formats` | `list<str>` | `true` | Which export formats to use for BookStack content. Valid options are: `["markdown", "html", "pdf", "plaintext", "zip"]`|
|
|
308
|
+
| `export_level` | `str` | `false` | Optional (default: `pages`). Export granularity. See [Export Level](#export-level) for details. Valid options: `pages`, `books`, `chapters`. |
|
|
310
309
|
| `output_path` | `str` | `false` | Optional (default: `cwd`) which directory (relative or full path) to place exports. User who runs the command should have access to read/write to this directory. This directory and any parent directories will be attempted to be created if they do not exist. If not provided, will use current run directory by default. If using docker, this option can be omitted. |
|
|
311
310
|
| `assets` | `object` | `false` | Optional section to export additional assets from pages. |
|
|
312
311
|
| `assets.export_images` | `bool` | `false` | Optional (default: `false`), export all images for a page to an `image` directory within page directory. See [Backup Behavior](#backup-behavior) for more information on layout |
|
|
313
312
|
| `assets.export_attachments` | `bool` | `false` | Optional (default: `false`), export all attachments for a page to an `attachments` directory within page directory. See [Backup Behavior](#backup-behavior) for more information on layout |
|
|
314
313
|
| `assets.modify_links` | `bool` | `false` | Optional (default: `false`). Rewrites image and attachment URLs in markdown AND html exports to local relative paths. Requires `assets.export_images` and/or `assets.export_attachments` to be `true`. Only applies to `markdown` and `html` formats; pdf, plaintext, and zip are not eligible. Legacy key `modify_markdown` still accepted (deprecated); will be removed in a future version. See [Modify Links](#modify-links) for more information. |
|
|
315
|
-
| `assets.export_meta` | `bool` | `false` | Optional (default: `false`), export
|
|
314
|
+
| `assets.export_meta` | `bool` | `false` | Optional (default: `false`), export metadata about each archived page, book, or chapter in a json file. |
|
|
316
315
|
| `http_config` | `object` | `false` | Optional section to override default http configuration. |
|
|
317
316
|
| `http_config.verify_ssl` | `bool` | `false` | Optional (default: `false`), whether or not to verify ssl certificates if using https. |
|
|
318
317
|
| `http_config.timeout` | `int` | `false` | Optional (default: `30`), set the timeout, in seconds, for http requests. |
|
|
@@ -337,6 +336,26 @@ General
|
|
|
337
336
|
- `MINIO_ACCESS_KEY`
|
|
338
337
|
- `MINIO_SECRET_KEY`
|
|
339
338
|
|
|
339
|
+
## Export Level
|
|
340
|
+
|
|
341
|
+
The `export_level` configuration option controls the granularity of exports:
|
|
342
|
+
|
|
343
|
+
| Value | Description |
|
|
344
|
+
| ----- | ----------- |
|
|
345
|
+
| `pages` (default) | One file per page. Supports `assets.export_images`, `assets.export_attachments`, and `assets.modify_links`. |
|
|
346
|
+
| `books` | One combined file per book per format, written to a per-book folder (`<shelf>/<book>/<book>.<ext>`). Set `assets.modify_links: true` (with `export_images`/`export_attachments`) to download images/attachments locally and rewrite links to relative paths in `markdown` and `html`. `pdf` stays self-contained (assets embedded by Bookstack server-side). |
|
|
347
|
+
| `chapters` | One combined file per chapter per format, in a per-chapter folder (`<shelf>/<book>/<chapter>/<chapter>.<ext>`). Same `modify_links` support (markdown + html) as `books`. **Note:** pages not under any chapter are not captured at this level. |
|
|
348
|
+
|
|
349
|
+
**Example:** `formats: [pdf]` + `export_level: books` exports one PDF per book through the server-side BookStack API export.
|
|
350
|
+
|
|
351
|
+
**Empty nodes:** At `books` and `chapters` levels, a book or chapter with no child content is skipped — no file is written and the omission is logged at `INFO`. This keeps the archive free of empty placeholder documents.
|
|
352
|
+
|
|
353
|
+
The shelf/book/chapter hierarchy is preserved as directories inside the archive regardless of level — e.g. `books` produces `<shelf>/<book>/<book>.pdf` and `chapters` produces `<shelf>/<book>/<chapter>/<chapter>.pdf` (books without a shelf go under the unassigned directory).
|
|
354
|
+
|
|
355
|
+
`assets.export_meta` applies at all levels: when enabled, a `_meta.json` file is written alongside each exported node.
|
|
356
|
+
|
|
357
|
+
For non-default levels the archive filename is suffixed with the level (e.g. `bkps_books_<timestamp>.tgz`, `bkps_chapters_<timestamp>.tgz`); `pages` keeps the unsuffixed `bkps_<timestamp>.tgz`. Because `keep_last` cleanup matches on this prefix, archive retention is scoped independently per level.
|
|
358
|
+
|
|
340
359
|
## Backup Behavior
|
|
341
360
|
|
|
342
361
|
### General
|
|
@@ -477,7 +496,7 @@ If an API call to get an attachment or its metadata fails, the exporter will ski
|
|
|
477
496
|
|
|
478
497
|
The configuration item, `assets.modify_links`, can be set to `true` to rewrite image and attachment URL links in exported files to local relative paths. This feature makes your `markdown` and `html` exports fully portable — assets resolve locally without a network connection to the Bookstack instance.
|
|
479
498
|
|
|
480
|
-
- **Eligible formats**: `markdown` and `html` only. PDF, plaintext, and zip exports are not
|
|
499
|
+
- **Eligible formats**: `markdown` and `html` only. PDF, plaintext, and zip exports are not yet requested/implemented.
|
|
481
500
|
- **Scope**: rewrites image `src` attributes and their outer anchor `href` wrappers; rewrites attachment `<a href>` links. Does **not** rewrite inter-page, inter-book, inter-chapter, or inter-shelf links (deferred to a future issue).
|
|
482
501
|
- **Legacy alias**: the old key `modify_markdown` will be removed in a future version. Rename to `modify_links` in your configuration.
|
|
483
502
|
|
|
@@ -495,20 +514,31 @@ Page (parent) -> Images (children) relationships are created and then each image
|
|
|
495
514
|
|
|
496
515
|
#### HTML example
|
|
497
516
|
|
|
498
|
-
Bookstack HTML exports wrap images in an anchor tag (click-to-zoom). Both the
|
|
517
|
+
Bookstack HTML exports wrap images in an anchor tag (click-to-zoom). Both the
|
|
518
|
+
`<img src>` and the outer `<a href>` are rewritten to the same local file.
|
|
519
|
+
Images appear in one of two forms; both are localized:
|
|
499
520
|
|
|
500
521
|
```html
|
|
501
|
-
<!-- before:
|
|
522
|
+
<!-- before: remote "scaled" thumbnail src (older bookstack installations) -->
|
|
523
|
+
<a href="https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png">
|
|
524
|
+
<img src="https://demo.bookstack/uploads/images/gallery/2023-07/scaled-1680-/pool-topology-1.png">
|
|
525
|
+
</a>
|
|
526
|
+
|
|
527
|
+
<!-- before: inline base64 src (recent bookstack installations) -->
|
|
502
528
|
<a href="https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png">
|
|
503
529
|
<img src="data:image/png;base64,...">
|
|
504
530
|
</a>
|
|
505
531
|
|
|
506
|
-
<!-- after -->
|
|
532
|
+
<!-- after (both forms): src and href point at the one local file -->
|
|
507
533
|
<a href="images/{page_name}/pool-topology-1.png">
|
|
508
|
-
<img src="
|
|
534
|
+
<img src="images/{page_name}/pool-topology-1.png">
|
|
509
535
|
</a>
|
|
510
536
|
```
|
|
511
537
|
|
|
538
|
+
Inline base64 images are de-inlined to the local file (shrinking the export by
|
|
539
|
+
up to ~700 KB per full-size image). A base64 image **not** wrapped in a
|
|
540
|
+
downloadable anchor is left inline (it still resolves offline).
|
|
541
|
+
|
|
512
542
|
Attachment links are rewritten from the live URL to a local relative path.
|
|
513
543
|
|
|
514
544
|
```html
|
|
@@ -523,7 +553,7 @@ Attachment links are rewritten from the live URL to a local relative path.
|
|
|
523
553
|
|
|
524
554
|
Markdown exports use raw `bytes.replace` — no structural awareness (e.g. DOM). If an attachment URL for some reason appears verbatim anywhere in the markdown source (code block, pre, comment, plain text), it gets replaced.
|
|
525
555
|
|
|
526
|
-
HTML exports are safe because bs4 filters to only
|
|
556
|
+
HTML exports are safe because bs4 filters to only `<img src> / <a href>` attributes before replacing.
|
|
527
557
|
|
|
528
558
|
## Object Storage
|
|
529
559
|
Optionally, target(s) can be specified to upload generated archives to a remote location. Supported object storage providers can be found below:
|
|
@@ -614,24 +644,30 @@ Below are versions that have major changes to the way configuration or exporter
|
|
|
614
644
|
|
|
615
645
|
## Running Tests
|
|
616
646
|
|
|
617
|
-
|
|
647
|
+
This project uses [uv](https://docs.astral.sh/uv/) for development. Sync dev dependencies and run the test suite:
|
|
648
|
+
|
|
649
|
+
```bash
|
|
650
|
+
uv sync --all-groups
|
|
651
|
+
uv run pytest
|
|
652
|
+
```
|
|
653
|
+
|
|
654
|
+
Or via the [Taskfile](https://taskfile.dev) target:
|
|
618
655
|
|
|
619
656
|
```bash
|
|
620
|
-
|
|
621
|
-
pytest
|
|
657
|
+
task test
|
|
622
658
|
```
|
|
623
659
|
|
|
624
660
|
The pytest run includes coverage by default (configured in `pyproject.toml`). For an HTML coverage report:
|
|
625
661
|
|
|
626
662
|
```bash
|
|
627
|
-
pytest --cov-report=html
|
|
663
|
+
uv run pytest --cov-report=html
|
|
628
664
|
open htmlcov/index.html
|
|
629
665
|
```
|
|
630
666
|
|
|
631
667
|
To run only unit tests (skipping integration tests):
|
|
632
668
|
|
|
633
669
|
```bash
|
|
634
|
-
pytest tests/unit
|
|
670
|
+
uv run pytest tests/unit
|
|
635
671
|
```
|
|
636
672
|
|
|
637
673
|
To run only the integration tests:
|
|
@@ -1,30 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: bookstack-file-exporter
|
|
3
|
-
Version: 2.0.0
|
|
4
|
-
Summary: An exporter written in python to export all documents from a bookstack instance in different formats
|
|
5
|
-
Home-page: https://github.com/homeylab/bookstack-file-exporter
|
|
6
|
-
Author: pchang388
|
|
7
|
-
License: MIT License
|
|
8
|
-
Keywords: bookstack,exporter
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Requires-Python: >=3.8
|
|
12
|
-
Description-Content-Type: text/markdown
|
|
13
|
-
License-File: LICENSE
|
|
14
|
-
Requires-Dist: Pyyaml>=6.0.2
|
|
15
|
-
Requires-Dist: Pydantic>=2.13.4
|
|
16
|
-
Requires-Dist: beautifulsoup4>=4.14.3
|
|
17
|
-
Requires-Dist: requests>=2.32.3
|
|
18
|
-
Requires-Dist: minio>=7.2.15
|
|
19
|
-
Requires-Dist: apprise>=1.9.4
|
|
20
|
-
Requires-Dist: markdown-it-py>=3.0.0
|
|
21
|
-
Provides-Extra: dev
|
|
22
|
-
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
23
|
-
Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
24
|
-
Requires-Dist: responses>=0.25; extra == "dev"
|
|
25
|
-
Requires-Dist: pylint; extra == "dev"
|
|
26
|
-
Dynamic: license-file
|
|
27
|
-
|
|
28
1
|
# bookstack-file-exporter
|
|
29
2
|
Table of Contents
|
|
30
3
|
- [bookstack-file-exporter](#bookstack-file-exporter)
|
|
@@ -53,7 +26,7 @@ Table of Contents
|
|
|
53
26
|
## Background
|
|
54
27
|
_If you encounter any issues, want to request an additional feature, or provide assistance, feel free to open a Github issue._
|
|
55
28
|
|
|
56
|
-
This tool provides a way to export [Bookstack](https://github.com/BookStackApp/BookStack) pages and their content (_text, images, attachments, metadata, etc._) into a relational parent-child layout locally with an option to push to remote object storage locations. See [Backup Behavior](#backup-behavior) section for more details on how pages are organized. Image and attachment links can also be modified in markdown exports to point to local exported paths.
|
|
29
|
+
This tool provides a way to export [Bookstack](https://github.com/BookStackApp/BookStack) pages and their content (_text, images, attachments, metadata, etc._) into a relational parent-child layout locally with an option to push to remote object storage locations. See [Backup Behavior](#backup-behavior) section for more details on how pages are organized. Image and attachment links can also be modified in markdown and html exports to point to local exported paths.
|
|
57
30
|
|
|
58
31
|
This small project was mainly created to run as a cron job in k8s but works anywhere. This tool allows me to export my docs in markdown, or other formats like pdf. I use Bookstack's markdown editor as default instead of WYSIWYG editor and this makes my notes portable anywhere even if offline.
|
|
59
32
|
|
|
@@ -118,15 +91,18 @@ assets:
|
|
|
118
91
|
```
|
|
119
92
|
|
|
120
93
|
### Run via Pip
|
|
121
|
-
The exporter can be installed via pip and run directly.
|
|
94
|
+
The exporter can be installed via pip (or [uv](https://docs.astral.sh/uv/)) and run directly.
|
|
122
95
|
|
|
123
96
|
#### Python Version
|
|
124
|
-
_Note: This application is tested and developed on Python version `3.
|
|
97
|
+
_Note: This application is tested and developed on Python version `3.14.5`. The min required version is >= `3.11` but is recommended to install (or set up a venv) a `3.14.5` version._
|
|
125
98
|
|
|
126
99
|
#### Examples
|
|
127
100
|
```bash
|
|
128
101
|
python -m pip install bookstack-file-exporter
|
|
129
102
|
|
|
103
|
+
# or with uv:
|
|
104
|
+
uv pip install bookstack-file-exporter
|
|
105
|
+
|
|
130
106
|
# if you prefer a specific version, example:
|
|
131
107
|
python -m pip install bookstack-file-exporter==X.X.X
|
|
132
108
|
|
|
@@ -306,13 +282,14 @@ More descriptions can be found for each section below:
|
|
|
306
282
|
| `credentials` | `object` | `false` | Optional section where Bookstack tokenId and tokenSecret can be specified. Env variable for credentials may be supplied instead. See [Authentication](#authentication) for more details. |
|
|
307
283
|
| `credentials.token_id` | `str`| `false` if specified through env var instead, otherwise `true` | A valid Bookstack tokenId. |
|
|
308
284
|
| `credentials.token_secret` | `str` | `false` if specified through env var instead, otherwise `true` | A valid Bookstack tokenSecret. |
|
|
309
|
-
| `formats` | `list<str>` | `true` | Which export formats to use for
|
|
285
|
+
| `formats` | `list<str>` | `true` | Which export formats to use for BookStack content. Valid options are: `["markdown", "html", "pdf", "plaintext", "zip"]`|
|
|
286
|
+
| `export_level` | `str` | `false` | Optional (default: `pages`). Export granularity. See [Export Level](#export-level) for details. Valid options: `pages`, `books`, `chapters`. |
|
|
310
287
|
| `output_path` | `str` | `false` | Optional (default: `cwd`) which directory (relative or full path) to place exports. User who runs the command should have access to read/write to this directory. This directory and any parent directories will be attempted to be created if they do not exist. If not provided, will use current run directory by default. If using docker, this option can be omitted. |
|
|
311
288
|
| `assets` | `object` | `false` | Optional section to export additional assets from pages. |
|
|
312
289
|
| `assets.export_images` | `bool` | `false` | Optional (default: `false`), export all images for a page to an `image` directory within page directory. See [Backup Behavior](#backup-behavior) for more information on layout |
|
|
313
290
|
| `assets.export_attachments` | `bool` | `false` | Optional (default: `false`), export all attachments for a page to an `attachments` directory within page directory. See [Backup Behavior](#backup-behavior) for more information on layout |
|
|
314
291
|
| `assets.modify_links` | `bool` | `false` | Optional (default: `false`). Rewrites image and attachment URLs in markdown AND html exports to local relative paths. Requires `assets.export_images` and/or `assets.export_attachments` to be `true`. Only applies to `markdown` and `html` formats; pdf, plaintext, and zip are not eligible. Legacy key `modify_markdown` still accepted (deprecated); will be removed in a future version. See [Modify Links](#modify-links) for more information. |
|
|
315
|
-
| `assets.export_meta` | `bool` | `false` | Optional (default: `false`), export
|
|
292
|
+
| `assets.export_meta` | `bool` | `false` | Optional (default: `false`), export metadata about each archived page, book, or chapter in a json file. |
|
|
316
293
|
| `http_config` | `object` | `false` | Optional section to override default http configuration. |
|
|
317
294
|
| `http_config.verify_ssl` | `bool` | `false` | Optional (default: `false`), whether or not to verify ssl certificates if using https. |
|
|
318
295
|
| `http_config.timeout` | `int` | `false` | Optional (default: `30`), set the timeout, in seconds, for http requests. |
|
|
@@ -337,6 +314,26 @@ General
|
|
|
337
314
|
- `MINIO_ACCESS_KEY`
|
|
338
315
|
- `MINIO_SECRET_KEY`
|
|
339
316
|
|
|
317
|
+
## Export Level
|
|
318
|
+
|
|
319
|
+
The `export_level` configuration option controls the granularity of exports:
|
|
320
|
+
|
|
321
|
+
| Value | Description |
|
|
322
|
+
| ----- | ----------- |
|
|
323
|
+
| `pages` (default) | One file per page. Supports `assets.export_images`, `assets.export_attachments`, and `assets.modify_links`. |
|
|
324
|
+
| `books` | One combined file per book per format, written to a per-book folder (`<shelf>/<book>/<book>.<ext>`). Set `assets.modify_links: true` (with `export_images`/`export_attachments`) to download images/attachments locally and rewrite links to relative paths in `markdown` and `html`. `pdf` stays self-contained (assets embedded by Bookstack server-side). |
|
|
325
|
+
| `chapters` | One combined file per chapter per format, in a per-chapter folder (`<shelf>/<book>/<chapter>/<chapter>.<ext>`). Same `modify_links` support (markdown + html) as `books`. **Note:** pages not under any chapter are not captured at this level. |
|
|
326
|
+
|
|
327
|
+
**Example:** `formats: [pdf]` + `export_level: books` exports one PDF per book through the server-side BookStack API export.
|
|
328
|
+
|
|
329
|
+
**Empty nodes:** At `books` and `chapters` levels, a book or chapter with no child content is skipped — no file is written and the omission is logged at `INFO`. This keeps the archive free of empty placeholder documents.
|
|
330
|
+
|
|
331
|
+
The shelf/book/chapter hierarchy is preserved as directories inside the archive regardless of level — e.g. `books` produces `<shelf>/<book>/<book>.pdf` and `chapters` produces `<shelf>/<book>/<chapter>/<chapter>.pdf` (books without a shelf go under the unassigned directory).
|
|
332
|
+
|
|
333
|
+
`assets.export_meta` applies at all levels: when enabled, a `_meta.json` file is written alongside each exported node.
|
|
334
|
+
|
|
335
|
+
For non-default levels the archive filename is suffixed with the level (e.g. `bkps_books_<timestamp>.tgz`, `bkps_chapters_<timestamp>.tgz`); `pages` keeps the unsuffixed `bkps_<timestamp>.tgz`. Because `keep_last` cleanup matches on this prefix, archive retention is scoped independently per level.
|
|
336
|
+
|
|
340
337
|
## Backup Behavior
|
|
341
338
|
|
|
342
339
|
### General
|
|
@@ -477,7 +474,7 @@ If an API call to get an attachment or its metadata fails, the exporter will ski
|
|
|
477
474
|
|
|
478
475
|
The configuration item, `assets.modify_links`, can be set to `true` to rewrite image and attachment URL links in exported files to local relative paths. This feature makes your `markdown` and `html` exports fully portable — assets resolve locally without a network connection to the Bookstack instance.
|
|
479
476
|
|
|
480
|
-
- **Eligible formats**: `markdown` and `html` only. PDF, plaintext, and zip exports are not
|
|
477
|
+
- **Eligible formats**: `markdown` and `html` only. PDF, plaintext, and zip exports are not yet requested/implemented.
|
|
481
478
|
- **Scope**: rewrites image `src` attributes and their outer anchor `href` wrappers; rewrites attachment `<a href>` links. Does **not** rewrite inter-page, inter-book, inter-chapter, or inter-shelf links (deferred to a future issue).
|
|
482
479
|
- **Legacy alias**: the old key `modify_markdown` will be removed in a future version. Rename to `modify_links` in your configuration.
|
|
483
480
|
|
|
@@ -495,20 +492,31 @@ Page (parent) -> Images (children) relationships are created and then each image
|
|
|
495
492
|
|
|
496
493
|
#### HTML example
|
|
497
494
|
|
|
498
|
-
Bookstack HTML exports wrap images in an anchor tag (click-to-zoom). Both the
|
|
495
|
+
Bookstack HTML exports wrap images in an anchor tag (click-to-zoom). Both the
|
|
496
|
+
`<img src>` and the outer `<a href>` are rewritten to the same local file.
|
|
497
|
+
Images appear in one of two forms; both are localized:
|
|
499
498
|
|
|
500
499
|
```html
|
|
501
|
-
<!-- before:
|
|
500
|
+
<!-- before: remote "scaled" thumbnail src (older bookstack installations) -->
|
|
501
|
+
<a href="https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png">
|
|
502
|
+
<img src="https://demo.bookstack/uploads/images/gallery/2023-07/scaled-1680-/pool-topology-1.png">
|
|
503
|
+
</a>
|
|
504
|
+
|
|
505
|
+
<!-- before: inline base64 src (recent bookstack installations) -->
|
|
502
506
|
<a href="https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png">
|
|
503
507
|
<img src="data:image/png;base64,...">
|
|
504
508
|
</a>
|
|
505
509
|
|
|
506
|
-
<!-- after -->
|
|
510
|
+
<!-- after (both forms): src and href point at the one local file -->
|
|
507
511
|
<a href="images/{page_name}/pool-topology-1.png">
|
|
508
|
-
<img src="
|
|
512
|
+
<img src="images/{page_name}/pool-topology-1.png">
|
|
509
513
|
</a>
|
|
510
514
|
```
|
|
511
515
|
|
|
516
|
+
Inline base64 images are de-inlined to the local file (shrinking the export by
|
|
517
|
+
up to ~700 KB per full-size image). A base64 image **not** wrapped in a
|
|
518
|
+
downloadable anchor is left inline (it still resolves offline).
|
|
519
|
+
|
|
512
520
|
Attachment links are rewritten from the live URL to a local relative path.
|
|
513
521
|
|
|
514
522
|
```html
|
|
@@ -523,7 +531,7 @@ Attachment links are rewritten from the live URL to a local relative path.
|
|
|
523
531
|
|
|
524
532
|
Markdown exports use raw `bytes.replace` — no structural awareness (e.g. DOM). If an attachment URL for some reason appears verbatim anywhere in the markdown source (code block, pre, comment, plain text), it gets replaced.
|
|
525
533
|
|
|
526
|
-
HTML exports are safe because bs4 filters to only
|
|
534
|
+
HTML exports are safe because bs4 filters to only `<img src> / <a href>` attributes before replacing.
|
|
527
535
|
|
|
528
536
|
## Object Storage
|
|
529
537
|
Optionally, target(s) can be specified to upload generated archives to a remote location. Supported object storage providers can be found below:
|
|
@@ -614,24 +622,30 @@ Below are versions that have major changes to the way configuration or exporter
|
|
|
614
622
|
|
|
615
623
|
## Running Tests
|
|
616
624
|
|
|
617
|
-
|
|
625
|
+
This project uses [uv](https://docs.astral.sh/uv/) for development. Sync dev dependencies and run the test suite:
|
|
626
|
+
|
|
627
|
+
```bash
|
|
628
|
+
uv sync --all-groups
|
|
629
|
+
uv run pytest
|
|
630
|
+
```
|
|
631
|
+
|
|
632
|
+
Or via the [Taskfile](https://taskfile.dev) target:
|
|
618
633
|
|
|
619
634
|
```bash
|
|
620
|
-
|
|
621
|
-
pytest
|
|
635
|
+
task test
|
|
622
636
|
```
|
|
623
637
|
|
|
624
638
|
The pytest run includes coverage by default (configured in `pyproject.toml`). For an HTML coverage report:
|
|
625
639
|
|
|
626
640
|
```bash
|
|
627
|
-
pytest --cov-report=html
|
|
641
|
+
uv run pytest --cov-report=html
|
|
628
642
|
open htmlcov/index.html
|
|
629
643
|
```
|
|
630
644
|
|
|
631
645
|
To run only unit tests (skipping integration tests):
|
|
632
646
|
|
|
633
647
|
```bash
|
|
634
|
-
pytest tests/unit
|
|
648
|
+
uv run pytest tests/unit
|
|
635
649
|
```
|
|
636
650
|
|
|
637
651
|
To run only the integration tests:
|
|
@@ -1,11 +1,15 @@
|
|
|
1
|
-
from typing import List, Dict
|
|
2
1
|
from datetime import datetime
|
|
3
2
|
import logging
|
|
4
3
|
import os
|
|
5
4
|
|
|
6
5
|
from bookstack_file_exporter.exporter.node import Node
|
|
7
6
|
from bookstack_file_exporter.archiver import util
|
|
8
|
-
from bookstack_file_exporter.archiver.
|
|
7
|
+
from bookstack_file_exporter.archiver.node_archiver import (
|
|
8
|
+
NodeArchiver,
|
|
9
|
+
BookArchiver,
|
|
10
|
+
ChapterArchiver,
|
|
11
|
+
PageArchiver,
|
|
12
|
+
)
|
|
9
13
|
from bookstack_file_exporter.archiver.minio_archiver import MinioArchiver
|
|
10
14
|
from bookstack_file_exporter.config_helper.remote import StorageProviderConfig
|
|
11
15
|
from bookstack_file_exporter.config_helper.config_helper import ConfigNode
|
|
@@ -18,7 +22,7 @@ _DATE_STR_FORMAT = "%Y-%m-%d_%H-%M-%S"
|
|
|
18
22
|
# pylint: disable=too-many-instance-attributes
|
|
19
23
|
class Archiver:
|
|
20
24
|
"""
|
|
21
|
-
Archiver helps handle archive duties: pulls all the necessary files from upstream
|
|
25
|
+
Archiver helps handle archive duties: pulls all the necessary files from upstream
|
|
22
26
|
and then pushes them to the specified backup location(s)
|
|
23
27
|
|
|
24
28
|
Args:
|
|
@@ -26,15 +30,41 @@ class Archiver:
|
|
|
26
30
|
:http_client: <HttpHelper> = http helper functions with config from user inputs
|
|
27
31
|
|
|
28
32
|
Returns:
|
|
29
|
-
Archiver instance with attributes that are accessible
|
|
33
|
+
Archiver instance with attributes that are accessible
|
|
30
34
|
for use for handling bookstack exports and remote uploads.
|
|
31
35
|
"""
|
|
32
36
|
def __init__(self, config: ConfigNode, http_client: HttpHelper):
|
|
33
37
|
self.config = config
|
|
34
38
|
# for convenience
|
|
35
|
-
self.base_dir = config.base_dir_name
|
|
39
|
+
self.base_dir = self._level_base_dir(config.base_dir_name,
|
|
40
|
+
config.user_inputs.export_level)
|
|
36
41
|
self.archive_dir = self._generate_root_folder(self.base_dir)
|
|
37
|
-
self.
|
|
42
|
+
self._archiver: NodeArchiver = self._build_archiver(http_client)
|
|
43
|
+
|
|
44
|
+
def _build_archiver(self, http_client: HttpHelper) -> NodeArchiver:
|
|
45
|
+
"""Return the appropriate archiver based on the configured export level."""
|
|
46
|
+
export_level = self.config.user_inputs.export_level
|
|
47
|
+
export_meta: bool = self.config.user_inputs.assets.export_meta
|
|
48
|
+
if export_level == "books":
|
|
49
|
+
return BookArchiver(
|
|
50
|
+
archive_dir=self.archive_dir,
|
|
51
|
+
api_urls=self.config.urls,
|
|
52
|
+
export_formats=self.config.user_inputs.formats,
|
|
53
|
+
http_client=http_client,
|
|
54
|
+
export_meta=export_meta,
|
|
55
|
+
asset_config=self.config.user_inputs.assets,
|
|
56
|
+
)
|
|
57
|
+
if export_level == "chapters":
|
|
58
|
+
return ChapterArchiver(
|
|
59
|
+
archive_dir=self.archive_dir,
|
|
60
|
+
api_urls=self.config.urls,
|
|
61
|
+
export_formats=self.config.user_inputs.formats,
|
|
62
|
+
http_client=http_client,
|
|
63
|
+
export_meta=export_meta,
|
|
64
|
+
asset_config=self.config.user_inputs.assets,
|
|
65
|
+
)
|
|
66
|
+
# default: "pages"
|
|
67
|
+
return PageArchiver(self.archive_dir, self.config, http_client)
|
|
38
68
|
|
|
39
69
|
def create_export_dir(self):
|
|
40
70
|
"""create directory for archiving"""
|
|
@@ -52,17 +82,23 @@ class Archiver:
|
|
|
52
82
|
"attempting to skip this step")
|
|
53
83
|
return
|
|
54
84
|
|
|
55
|
-
def get_bookstack_exports(self,
|
|
56
|
-
"""export all
|
|
57
|
-
log.info("Exporting all bookstack
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
85
|
+
def get_bookstack_exports(self, nodes: dict[int, Node]):
|
|
86
|
+
"""export all node content (polymorphic: pages, books, or chapters)"""
|
|
87
|
+
log.info("Exporting all bookstack contents")
|
|
88
|
+
self._archiver.archive(nodes)
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def has_exported_content(self) -> bool:
|
|
92
|
+
"""True if the intermediate tar exists, i.e. at least one file was written.
|
|
93
|
+
|
|
94
|
+
Checked against the tar on disk (ground truth) rather than a flag threaded
|
|
95
|
+
up from the archivers, so it cannot drift from what was actually archived.
|
|
96
|
+
"""
|
|
97
|
+
return os.path.exists(self._archiver.tar_file)
|
|
61
98
|
|
|
62
99
|
def create_archive(self):
|
|
63
100
|
"""create tgz archive"""
|
|
64
|
-
|
|
65
|
-
self._page_archiver.gzip_archive()
|
|
101
|
+
self._archiver.gzip_archive()
|
|
66
102
|
|
|
67
103
|
# send to remote systems
|
|
68
104
|
def archive_remote(self):
|
|
@@ -84,8 +120,8 @@ class Archiver:
|
|
|
84
120
|
def _archive_minio(self, obj_config: StorageProviderConfig):
|
|
85
121
|
minio_archiver = MinioArchiver(obj_config.access_key,
|
|
86
122
|
obj_config.secret_key, obj_config.config)
|
|
87
|
-
minio_archiver.upload_backup(self.
|
|
88
|
-
minio_archiver.clean_up(self.
|
|
123
|
+
minio_archiver.upload_backup(self._archiver.archive_file)
|
|
124
|
+
minio_archiver.clean_up(self._archiver.file_extension_map['tgz'])
|
|
89
125
|
|
|
90
126
|
def _archive_s3(self, obj_config: StorageProviderConfig):
|
|
91
127
|
raise NotImplementedError("S3 remote storage is not yet implemented")
|
|
@@ -99,11 +135,11 @@ class Archiver:
|
|
|
99
135
|
if to_delete:
|
|
100
136
|
self._delete_files(to_delete)
|
|
101
137
|
|
|
102
|
-
def _get_stale_archives(self) ->
|
|
138
|
+
def _get_stale_archives(self) -> list[str]:
|
|
103
139
|
# if user is uploading to object storage
|
|
104
140
|
# delete the local .tgz archive since we have it there already
|
|
105
|
-
archive_list:
|
|
106
|
-
self.
|
|
141
|
+
archive_list: list[str] = util.scan_archives(self.base_dir,
|
|
142
|
+
self._archiver.file_extension_map['tgz'])
|
|
107
143
|
if not archive_list:
|
|
108
144
|
log.debug("No archive files found to clean up")
|
|
109
145
|
return []
|
|
@@ -120,7 +156,7 @@ class Archiver:
|
|
|
120
156
|
to_delete = self._filter_archives(archive_list)
|
|
121
157
|
return to_delete
|
|
122
158
|
|
|
123
|
-
def _filter_archives(self, file_list:
|
|
159
|
+
def _filter_archives(self, file_list: list[str]) -> list[str]:
|
|
124
160
|
"""get older archives based on keep number"""
|
|
125
161
|
file_dict = {file: os.stat(file).st_ctime for file in file_list}
|
|
126
162
|
ordered = sorted(file_dict.items(), key=lambda item: item[1])
|
|
@@ -134,10 +170,22 @@ class Archiver:
|
|
|
134
170
|
log.debug("%d local archives will be cleaned up", len(files_to_clean))
|
|
135
171
|
return files_to_clean
|
|
136
172
|
|
|
137
|
-
def _delete_files(self, file_list:
|
|
173
|
+
def _delete_files(self, file_list: list[str]):
|
|
138
174
|
for file in file_list:
|
|
139
175
|
util.remove_file(file)
|
|
140
176
|
|
|
177
|
+
@staticmethod
|
|
178
|
+
def _level_base_dir(base_dir: str, export_level: str) -> str:
|
|
179
|
+
"""Append the export level to the archive base name for non-default levels.
|
|
180
|
+
|
|
181
|
+
`pages` (the default) stays byte-identical to prior behavior; `books` and
|
|
182
|
+
`chapters` get a distinguishable name (e.g. `bkps_books`). Because keep_last
|
|
183
|
+
cleanup globs on this base, retention is naturally scoped per level.
|
|
184
|
+
"""
|
|
185
|
+
if export_level == "pages":
|
|
186
|
+
return base_dir
|
|
187
|
+
return f"{base_dir}_{export_level}"
|
|
188
|
+
|
|
141
189
|
@staticmethod
|
|
142
190
|
def _generate_root_folder(base_folder_name: str) -> str:
|
|
143
191
|
"""return base archive name"""
|