bookstack-file-exporter 2.0.0__tar.gz → 2.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {bookstack_file_exporter-2.0.0/bookstack_file_exporter.egg-info → bookstack_file_exporter-2.2.0}/PKG-INFO +65 -29
  2. bookstack_file_exporter-2.0.0/PKG-INFO → bookstack_file_exporter-2.2.0/README.md +57 -43
  3. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/archiver/archiver.py +69 -21
  4. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/archiver/asset_archiver.py +55 -41
  5. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/archiver/minio_archiver.py +7 -12
  6. bookstack_file_exporter-2.2.0/bookstack_file_exporter/archiver/node_archiver.py +406 -0
  7. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/archiver/util.py +1 -2
  8. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/common/util.py +45 -42
  9. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/config_helper/config_helper.py +15 -50
  10. bookstack_file_exporter-2.2.0/bookstack_file_exporter/config_helper/models.py +79 -0
  11. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/config_helper/notifications.py +2 -6
  12. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/config_helper/remote.py +1 -1
  13. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/exporter/exporter.py +22 -25
  14. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/exporter/node.py +3 -7
  15. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/notify/handler.py +1 -2
  16. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/notify/notifiers.py +2 -3
  17. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/run.py +37 -21
  18. bookstack_file_exporter-2.0.0/README.md → bookstack_file_exporter-2.2.0/bookstack_file_exporter.egg-info/PKG-INFO +80 -17
  19. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter.egg-info/SOURCES.txt +2 -23
  20. bookstack_file_exporter-2.2.0/bookstack_file_exporter.egg-info/requires.txt +7 -0
  21. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter.egg-info/top_level.txt +0 -1
  22. bookstack_file_exporter-2.2.0/pyproject.toml +63 -0
  23. bookstack_file_exporter-2.2.0/setup.cfg +4 -0
  24. bookstack_file_exporter-2.0.0/bookstack_file_exporter/archiver/page_archiver.py +0 -221
  25. bookstack_file_exporter-2.0.0/bookstack_file_exporter/config_helper/models.py +0 -73
  26. bookstack_file_exporter-2.0.0/bookstack_file_exporter.egg-info/requires.txt +0 -13
  27. bookstack_file_exporter-2.0.0/pyproject.toml +0 -23
  28. bookstack_file_exporter-2.0.0/setup.cfg +0 -41
  29. bookstack_file_exporter-2.0.0/tests/__init__.py +0 -2
  30. bookstack_file_exporter-2.0.0/tests/conftest.py +0 -165
  31. bookstack_file_exporter-2.0.0/tests/fixtures/__init__.py +0 -2
  32. bookstack_file_exporter-2.0.0/tests/fixtures/mock_config.py +0 -19
  33. bookstack_file_exporter-2.0.0/tests/helpers.py +0 -10
  34. bookstack_file_exporter-2.0.0/tests/integration/__init__.py +0 -2
  35. bookstack_file_exporter-2.0.0/tests/integration/test_empty_bookstack.py +0 -30
  36. bookstack_file_exporter-2.0.0/tests/integration/test_full_traversal.py +0 -187
  37. bookstack_file_exporter-2.0.0/tests/unit/__init__.py +0 -2
  38. bookstack_file_exporter-2.0.0/tests/unit/test_archiver.py +0 -355
  39. bookstack_file_exporter-2.0.0/tests/unit/test_archiver_util.py +0 -196
  40. bookstack_file_exporter-2.0.0/tests/unit/test_asset_archiver_config.py +0 -124
  41. bookstack_file_exporter-2.0.0/tests/unit/test_asset_archiver_core.py +0 -253
  42. bookstack_file_exporter-2.0.0/tests/unit/test_asset_archiver_html.py +0 -245
  43. bookstack_file_exporter-2.0.0/tests/unit/test_asset_archiver_modify_html.py +0 -376
  44. bookstack_file_exporter-2.0.0/tests/unit/test_common_util.py +0 -44
  45. bookstack_file_exporter-2.0.0/tests/unit/test_http_helper.py +0 -255
  46. bookstack_file_exporter-2.0.0/tests/unit/test_node.py +0 -271
  47. bookstack_file_exporter-2.0.0/tests/unit/test_node_exporter.py +0 -328
  48. bookstack_file_exporter-2.0.0/tests/unit/test_page_archiver.py +0 -223
  49. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/LICENSE +0 -0
  50. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/__init__.py +0 -0
  51. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/__main__.py +0 -0
  52. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/archiver/__init__.py +0 -0
  53. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/common/__init__.py +0 -0
  54. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/config_helper/__init__.py +0 -0
  55. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/exporter/__init__.py +0 -0
  56. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/notify/__init__.py +0 -0
  57. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter/run_args.py +0 -0
  58. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter.egg-info/dependency_links.txt +0 -0
  59. {bookstack_file_exporter-2.0.0 → bookstack_file_exporter-2.2.0}/bookstack_file_exporter.egg-info/entry_points.txt +0 -0
@@ -1,28 +1,23 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bookstack-file-exporter
3
- Version: 2.0.0
3
+ Version: 2.2.0
4
4
  Summary: An exporter written in python to export all documents from a bookstack instance in different formats
5
- Home-page: https://github.com/homeylab/bookstack-file-exporter
6
5
  Author: pchang388
7
6
  License: MIT License
7
+ Project-URL: Homepage, https://github.com/homeylab/bookstack-file-exporter
8
8
  Keywords: bookstack,exporter
9
9
  Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Programming Language :: Python :: 3
11
- Requires-Python: >=3.8
11
+ Requires-Python: >=3.11
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
- Requires-Dist: Pyyaml>=6.0.2
14
+ Requires-Dist: Pyyaml>=6.0.3
15
15
  Requires-Dist: Pydantic>=2.13.4
16
16
  Requires-Dist: beautifulsoup4>=4.14.3
17
- Requires-Dist: requests>=2.32.3
18
- Requires-Dist: minio>=7.2.15
19
- Requires-Dist: apprise>=1.9.4
20
- Requires-Dist: markdown-it-py>=3.0.0
21
- Provides-Extra: dev
22
- Requires-Dist: pytest>=8.0; extra == "dev"
23
- Requires-Dist: pytest-cov>=5.0; extra == "dev"
24
- Requires-Dist: responses>=0.25; extra == "dev"
25
- Requires-Dist: pylint; extra == "dev"
17
+ Requires-Dist: requests>=2.34.2
18
+ Requires-Dist: minio>=7.2.20
19
+ Requires-Dist: apprise>=1.10.0
20
+ Requires-Dist: markdown-it-py>=4.2.0
26
21
  Dynamic: license-file
27
22
 
28
23
  # bookstack-file-exporter
@@ -53,7 +48,7 @@ Table of Contents
53
48
  ## Background
54
49
  _If you encounter any issues, want to request an additional feature, or provide assistance, feel free to open a Github issue._
55
50
 
56
- This tool provides a way to export [Bookstack](https://github.com/BookStackApp/BookStack) pages and their content (_text, images, attachments, metadata, etc._) into a relational parent-child layout locally with an option to push to remote object storage locations. See [Backup Behavior](#backup-behavior) section for more details on how pages are organized. Image and attachment links can also be modified in markdown exports to point to local exported paths.
51
+ This tool provides a way to export [Bookstack](https://github.com/BookStackApp/BookStack) pages and their content (_text, images, attachments, metadata, etc._) into a relational parent-child layout locally with an option to push to remote object storage locations. See [Backup Behavior](#backup-behavior) section for more details on how pages are organized. Image and attachment links can also be modified in markdown and html exports to point to local exported paths.
57
52
 
58
53
  This small project was mainly created to run as a cron job in k8s but works anywhere. This tool allows me to export my docs in markdown, or other formats like pdf. I use Bookstack's markdown editor as default instead of WYSIWYG editor and this makes my notes portable anywhere even if offline.
59
54
 
@@ -118,15 +113,18 @@ assets:
118
113
  ```
119
114
 
120
115
  ### Run via Pip
121
- The exporter can be installed via pip and run directly.
116
+ The exporter can be installed via pip (or [uv](https://docs.astral.sh/uv/)) and run directly.
122
117
 
123
118
  #### Python Version
124
- _Note: This application is tested and developed on Python version `3.13.2`. The min required version is >= `3.8` but is recommended to install (or set up a venv) a `3.13.2` version._
119
+ _Note: This application is tested and developed on Python version `3.14.5`. The min required version is >= `3.11` but is recommended to install (or set up a venv) a `3.14.5` version._
125
120
 
126
121
  #### Examples
127
122
  ```bash
128
123
  python -m pip install bookstack-file-exporter
129
124
 
125
+ # or with uv:
126
+ uv pip install bookstack-file-exporter
127
+
130
128
  # if you prefer a specific version, example:
131
129
  python -m pip install bookstack-file-exporter==X.X.X
132
130
 
@@ -306,13 +304,14 @@ More descriptions can be found for each section below:
306
304
  | `credentials` | `object` | `false` | Optional section where Bookstack tokenId and tokenSecret can be specified. Env variable for credentials may be supplied instead. See [Authentication](#authentication) for more details. |
307
305
  | `credentials.token_id` | `str`| `false` if specified through env var instead, otherwise `true` | A valid Bookstack tokenId. |
308
306
  | `credentials.token_secret` | `str` | `false` if specified through env var instead, otherwise `true` | A valid Bookstack tokenSecret. |
309
- | `formats` | `list<str>` | `true` | Which export formats to use for Bookstack page content. Valid options are: `["markdown", "html", "pdf", "plaintext", "zip"]`|
307
+ | `formats` | `list<str>` | `true` | Which export formats to use for BookStack content. Valid options are: `["markdown", "html", "pdf", "plaintext", "zip"]`|
308
+ | `export_level` | `str` | `false` | Optional (default: `pages`). Export granularity. See [Export Level](#export-level) for details. Valid options: `pages`, `books`, `chapters`. |
310
309
  | `output_path` | `str` | `false` | Optional (default: `cwd`) which directory (relative or full path) to place exports. User who runs the command should have access to read/write to this directory. This directory and any parent directories will be attempted to be created if they do not exist. If not provided, will use current run directory by default. If using docker, this option can be omitted. |
311
310
  | `assets` | `object` | `false` | Optional section to export additional assets from pages. |
312
311
  | `assets.export_images` | `bool` | `false` | Optional (default: `false`), export all images for a page to an `image` directory within page directory. See [Backup Behavior](#backup-behavior) for more information on layout |
313
312
  | `assets.export_attachments` | `bool` | `false` | Optional (default: `false`), export all attachments for a page to an `attachments` directory within page directory. See [Backup Behavior](#backup-behavior) for more information on layout |
314
313
  | `assets.modify_links` | `bool` | `false` | Optional (default: `false`). Rewrites image and attachment URLs in markdown AND html exports to local relative paths. Requires `assets.export_images` and/or `assets.export_attachments` to be `true`. Only applies to `markdown` and `html` formats; pdf, plaintext, and zip are not eligible. Legacy key `modify_markdown` still accepted (deprecated); will be removed in a future version. See [Modify Links](#modify-links) for more information. |
315
- | `assets.export_meta` | `bool` | `false` | Optional (default: `false`), export of metadata about the page in a json file. |
314
+ | `assets.export_meta` | `bool` | `false` | Optional (default: `false`), export metadata about each archived page, book, or chapter in a json file. |
316
315
  | `http_config` | `object` | `false` | Optional section to override default http configuration. |
317
316
  | `http_config.verify_ssl` | `bool` | `false` | Optional (default: `false`), whether or not to verify ssl certificates if using https. |
318
317
  | `http_config.timeout` | `int` | `false` | Optional (default: `30`), set the timeout, in seconds, for http requests. |
@@ -337,6 +336,26 @@ General
337
336
  - `MINIO_ACCESS_KEY`
338
337
  - `MINIO_SECRET_KEY`
339
338
 
339
+ ## Export Level
340
+
341
+ The `export_level` configuration option controls the granularity of exports:
342
+
343
+ | Value | Description |
344
+ | ----- | ----------- |
345
+ | `pages` (default) | One file per page. Supports `assets.export_images`, `assets.export_attachments`, and `assets.modify_links`. |
346
+ | `books` | One combined file per book per format, written to a per-book folder (`<shelf>/<book>/<book>.<ext>`). Set `assets.modify_links: true` (with `export_images`/`export_attachments`) to download images/attachments locally and rewrite links to relative paths in `markdown` and `html`. `pdf` stays self-contained (assets embedded by Bookstack server-side). |
347
+ | `chapters` | One combined file per chapter per format, in a per-chapter folder (`<shelf>/<book>/<chapter>/<chapter>.<ext>`). Same `modify_links` support (markdown + html) as `books`. **Note:** pages not under any chapter are not captured at this level. |
348
+
349
+ **Example:** `formats: [pdf]` + `export_level: books` exports one PDF per book through the server-side BookStack API export.
350
+
351
+ **Empty nodes:** At `books` and `chapters` levels, a book or chapter with no child content is skipped — no file is written and the omission is logged at `INFO`. This keeps the archive free of empty placeholder documents.
352
+
353
+ The shelf/book/chapter hierarchy is preserved as directories inside the archive regardless of level — e.g. `books` produces `<shelf>/<book>/<book>.pdf` and `chapters` produces `<shelf>/<book>/<chapter>/<chapter>.pdf` (books without a shelf go under the unassigned directory).
354
+
355
+ `assets.export_meta` applies at all levels: when enabled, a `_meta.json` file is written alongside each exported node.
356
+
357
+ For non-default levels the archive filename is suffixed with the level (e.g. `bkps_books_<timestamp>.tgz`, `bkps_chapters_<timestamp>.tgz`); `pages` keeps the unsuffixed `bkps_<timestamp>.tgz`. Because `keep_last` cleanup matches on this prefix, archive retention is scoped independently per level.
358
+
340
359
  ## Backup Behavior
341
360
 
342
361
  ### General
@@ -477,7 +496,7 @@ If an API call to get an attachment or its metadata fails, the exporter will ski
477
496
 
478
497
  The configuration item, `assets.modify_links`, can be set to `true` to rewrite image and attachment URL links in exported files to local relative paths. This feature makes your `markdown` and `html` exports fully portable — assets resolve locally without a network connection to the Bookstack instance.
479
498
 
480
- - **Eligible formats**: `markdown` and `html` only. PDF, plaintext, and zip exports are not rewritten.
499
+ - **Eligible formats**: `markdown` and `html` only. PDF, plaintext, and zip exports are not yet requested/implemented.
481
500
  - **Scope**: rewrites image `src` attributes and their outer anchor `href` wrappers; rewrites attachment `<a href>` links. Does **not** rewrite inter-page, inter-book, inter-chapter, or inter-shelf links (deferred to a future issue).
482
501
  - **Legacy alias**: the old key `modify_markdown` will be removed in a future version. Rename to `modify_links` in your configuration.
483
502
 
@@ -495,20 +514,31 @@ Page (parent) -> Images (children) relationships are created and then each image
495
514
 
496
515
  #### HTML example
497
516
 
498
- Bookstack HTML exports wrap images in an anchor tag (click-to-zoom). Both the `<img src>` and the outer `<a href>` are rewritten to the same local file.
517
+ Bookstack HTML exports wrap images in an anchor tag (click-to-zoom). Both the
518
+ `<img src>` and the outer `<a href>` are rewritten to the same local file.
519
+ Images appear in one of two forms; both are localized:
499
520
 
500
521
  ```html
501
- <!-- before: anchor-wrapped image -->
522
+ <!-- before: remote "scaled" thumbnail src (older bookstack installations) -->
523
+ <a href="https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png">
524
+ <img src="https://demo.bookstack/uploads/images/gallery/2023-07/scaled-1680-/pool-topology-1.png">
525
+ </a>
526
+
527
+ <!-- before: inline base64 src (recent bookstack installations) -->
502
528
  <a href="https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png">
503
529
  <img src="data:image/png;base64,...">
504
530
  </a>
505
531
 
506
- <!-- after -->
532
+ <!-- after (both forms): src and href point at the one local file -->
507
533
  <a href="images/{page_name}/pool-topology-1.png">
508
- <img src="data:image/png;base64,...">
534
+ <img src="images/{page_name}/pool-topology-1.png">
509
535
  </a>
510
536
  ```
511
537
 
538
+ Inline base64 images are de-inlined to the local file (shrinking the export by
539
+ up to ~700 KB per full-size image). A base64 image **not** wrapped in a
540
+ downloadable anchor is left inline (it still resolves offline).
541
+
512
542
  Attachment links are rewritten from the live URL to a local relative path.
513
543
 
514
544
  ```html
@@ -523,7 +553,7 @@ Attachment links are rewritten from the live URL to a local relative path.
523
553
 
524
554
  Markdown exports use raw `bytes.replace` — no structural awareness (e.g. DOM). If an attachment URL for some reason appears verbatim anywhere in the markdown source (code block, pre, comment, plain text), it gets replaced.
525
555
 
526
- HTML exports are safe because bs4 filters to only <img src> / <a href> attributes before replacing.
556
+ HTML exports are safe because bs4 filters to only `<img src> / <a href>` attributes before replacing.
527
557
 
528
558
  ## Object Storage
529
559
  Optionally, target(s) can be specified to upload generated archives to a remote location. Supported object storage providers can be found below:
@@ -614,24 +644,30 @@ Below are versions that have major changes to the way configuration or exporter
614
644
 
615
645
  ## Running Tests
616
646
 
617
- Install dev dependencies and run the test suite:
647
+ This project uses [uv](https://docs.astral.sh/uv/) for development. Sync dev dependencies and run the test suite:
648
+
649
+ ```bash
650
+ uv sync --all-groups
651
+ uv run pytest
652
+ ```
653
+
654
+ Or via the [Taskfile](https://taskfile.dev) target:
618
655
 
619
656
  ```bash
620
- pip install -e ".[dev]"
621
- pytest
657
+ task test
622
658
  ```
623
659
 
624
660
  The pytest run includes coverage by default (configured in `pyproject.toml`). For an HTML coverage report:
625
661
 
626
662
  ```bash
627
- pytest --cov-report=html
663
+ uv run pytest --cov-report=html
628
664
  open htmlcov/index.html
629
665
  ```
630
666
 
631
667
  To run only unit tests (skipping integration tests):
632
668
 
633
669
  ```bash
634
- pytest tests/unit
670
+ uv run pytest tests/unit
635
671
  ```
636
672
 
637
673
  To run only the integration tests:
@@ -1,30 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: bookstack-file-exporter
3
- Version: 2.0.0
4
- Summary: An exporter written in python to export all documents from a bookstack instance in different formats
5
- Home-page: https://github.com/homeylab/bookstack-file-exporter
6
- Author: pchang388
7
- License: MIT License
8
- Keywords: bookstack,exporter
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Programming Language :: Python :: 3
11
- Requires-Python: >=3.8
12
- Description-Content-Type: text/markdown
13
- License-File: LICENSE
14
- Requires-Dist: Pyyaml>=6.0.2
15
- Requires-Dist: Pydantic>=2.13.4
16
- Requires-Dist: beautifulsoup4>=4.14.3
17
- Requires-Dist: requests>=2.32.3
18
- Requires-Dist: minio>=7.2.15
19
- Requires-Dist: apprise>=1.9.4
20
- Requires-Dist: markdown-it-py>=3.0.0
21
- Provides-Extra: dev
22
- Requires-Dist: pytest>=8.0; extra == "dev"
23
- Requires-Dist: pytest-cov>=5.0; extra == "dev"
24
- Requires-Dist: responses>=0.25; extra == "dev"
25
- Requires-Dist: pylint; extra == "dev"
26
- Dynamic: license-file
27
-
28
1
  # bookstack-file-exporter
29
2
  Table of Contents
30
3
  - [bookstack-file-exporter](#bookstack-file-exporter)
@@ -53,7 +26,7 @@ Table of Contents
53
26
  ## Background
54
27
  _If you encounter any issues, want to request an additional feature, or provide assistance, feel free to open a Github issue._
55
28
 
56
- This tool provides a way to export [Bookstack](https://github.com/BookStackApp/BookStack) pages and their content (_text, images, attachments, metadata, etc._) into a relational parent-child layout locally with an option to push to remote object storage locations. See [Backup Behavior](#backup-behavior) section for more details on how pages are organized. Image and attachment links can also be modified in markdown exports to point to local exported paths.
29
+ This tool provides a way to export [Bookstack](https://github.com/BookStackApp/BookStack) pages and their content (_text, images, attachments, metadata, etc._) into a relational parent-child layout locally with an option to push to remote object storage locations. See [Backup Behavior](#backup-behavior) section for more details on how pages are organized. Image and attachment links can also be modified in markdown and html exports to point to local exported paths.
57
30
 
58
31
  This small project was mainly created to run as a cron job in k8s but works anywhere. This tool allows me to export my docs in markdown, or other formats like pdf. I use Bookstack's markdown editor as default instead of WYSIWYG editor and this makes my notes portable anywhere even if offline.
59
32
 
@@ -118,15 +91,18 @@ assets:
118
91
  ```
119
92
 
120
93
  ### Run via Pip
121
- The exporter can be installed via pip and run directly.
94
+ The exporter can be installed via pip (or [uv](https://docs.astral.sh/uv/)) and run directly.
122
95
 
123
96
  #### Python Version
124
- _Note: This application is tested and developed on Python version `3.13.2`. The min required version is >= `3.8` but is recommended to install (or set up a venv) a `3.13.2` version._
97
+ _Note: This application is tested and developed on Python version `3.14.5`. The min required version is >= `3.11` but is recommended to install (or set up a venv) a `3.14.5` version._
125
98
 
126
99
  #### Examples
127
100
  ```bash
128
101
  python -m pip install bookstack-file-exporter
129
102
 
103
+ # or with uv:
104
+ uv pip install bookstack-file-exporter
105
+
130
106
  # if you prefer a specific version, example:
131
107
  python -m pip install bookstack-file-exporter==X.X.X
132
108
 
@@ -306,13 +282,14 @@ More descriptions can be found for each section below:
306
282
  | `credentials` | `object` | `false` | Optional section where Bookstack tokenId and tokenSecret can be specified. Env variable for credentials may be supplied instead. See [Authentication](#authentication) for more details. |
307
283
  | `credentials.token_id` | `str`| `false` if specified through env var instead, otherwise `true` | A valid Bookstack tokenId. |
308
284
  | `credentials.token_secret` | `str` | `false` if specified through env var instead, otherwise `true` | A valid Bookstack tokenSecret. |
309
- | `formats` | `list<str>` | `true` | Which export formats to use for Bookstack page content. Valid options are: `["markdown", "html", "pdf", "plaintext", "zip"]`|
285
+ | `formats` | `list<str>` | `true` | Which export formats to use for BookStack content. Valid options are: `["markdown", "html", "pdf", "plaintext", "zip"]`|
286
+ | `export_level` | `str` | `false` | Optional (default: `pages`). Export granularity. See [Export Level](#export-level) for details. Valid options: `pages`, `books`, `chapters`. |
310
287
  | `output_path` | `str` | `false` | Optional (default: `cwd`) which directory (relative or full path) to place exports. User who runs the command should have access to read/write to this directory. This directory and any parent directories will be attempted to be created if they do not exist. If not provided, will use current run directory by default. If using docker, this option can be omitted. |
311
288
  | `assets` | `object` | `false` | Optional section to export additional assets from pages. |
312
289
  | `assets.export_images` | `bool` | `false` | Optional (default: `false`), export all images for a page to an `image` directory within page directory. See [Backup Behavior](#backup-behavior) for more information on layout |
313
290
  | `assets.export_attachments` | `bool` | `false` | Optional (default: `false`), export all attachments for a page to an `attachments` directory within page directory. See [Backup Behavior](#backup-behavior) for more information on layout |
314
291
  | `assets.modify_links` | `bool` | `false` | Optional (default: `false`). Rewrites image and attachment URLs in markdown AND html exports to local relative paths. Requires `assets.export_images` and/or `assets.export_attachments` to be `true`. Only applies to `markdown` and `html` formats; pdf, plaintext, and zip are not eligible. Legacy key `modify_markdown` still accepted (deprecated); will be removed in a future version. See [Modify Links](#modify-links) for more information. |
315
- | `assets.export_meta` | `bool` | `false` | Optional (default: `false`), export of metadata about the page in a json file. |
292
+ | `assets.export_meta` | `bool` | `false` | Optional (default: `false`), export metadata about each archived page, book, or chapter in a json file. |
316
293
  | `http_config` | `object` | `false` | Optional section to override default http configuration. |
317
294
  | `http_config.verify_ssl` | `bool` | `false` | Optional (default: `false`), whether or not to verify ssl certificates if using https. |
318
295
  | `http_config.timeout` | `int` | `false` | Optional (default: `30`), set the timeout, in seconds, for http requests. |
@@ -337,6 +314,26 @@ General
337
314
  - `MINIO_ACCESS_KEY`
338
315
  - `MINIO_SECRET_KEY`
339
316
 
317
+ ## Export Level
318
+
319
+ The `export_level` configuration option controls the granularity of exports:
320
+
321
+ | Value | Description |
322
+ | ----- | ----------- |
323
+ | `pages` (default) | One file per page. Supports `assets.export_images`, `assets.export_attachments`, and `assets.modify_links`. |
324
+ | `books` | One combined file per book per format, written to a per-book folder (`<shelf>/<book>/<book>.<ext>`). Set `assets.modify_links: true` (with `export_images`/`export_attachments`) to download images/attachments locally and rewrite links to relative paths in `markdown` and `html`. `pdf` stays self-contained (assets embedded by Bookstack server-side). |
325
+ | `chapters` | One combined file per chapter per format, in a per-chapter folder (`<shelf>/<book>/<chapter>/<chapter>.<ext>`). Same `modify_links` support (markdown + html) as `books`. **Note:** pages not under any chapter are not captured at this level. |
326
+
327
+ **Example:** `formats: [pdf]` + `export_level: books` exports one PDF per book through the server-side BookStack API export.
328
+
329
+ **Empty nodes:** At `books` and `chapters` levels, a book or chapter with no child content is skipped — no file is written and the omission is logged at `INFO`. This keeps the archive free of empty placeholder documents.
330
+
331
+ The shelf/book/chapter hierarchy is preserved as directories inside the archive regardless of level — e.g. `books` produces `<shelf>/<book>/<book>.pdf` and `chapters` produces `<shelf>/<book>/<chapter>/<chapter>.pdf` (books without a shelf go under the unassigned directory).
332
+
333
+ `assets.export_meta` applies at all levels: when enabled, a `_meta.json` file is written alongside each exported node.
334
+
335
+ For non-default levels the archive filename is suffixed with the level (e.g. `bkps_books_<timestamp>.tgz`, `bkps_chapters_<timestamp>.tgz`); `pages` keeps the unsuffixed `bkps_<timestamp>.tgz`. Because `keep_last` cleanup matches on this prefix, archive retention is scoped independently per level.
336
+
340
337
  ## Backup Behavior
341
338
 
342
339
  ### General
@@ -477,7 +474,7 @@ If an API call to get an attachment or its metadata fails, the exporter will ski
477
474
 
478
475
  The configuration item, `assets.modify_links`, can be set to `true` to rewrite image and attachment URL links in exported files to local relative paths. This feature makes your `markdown` and `html` exports fully portable — assets resolve locally without a network connection to the Bookstack instance.
479
476
 
480
- - **Eligible formats**: `markdown` and `html` only. PDF, plaintext, and zip exports are not rewritten.
477
+ - **Eligible formats**: `markdown` and `html` only. PDF, plaintext, and zip exports are not yet requested/implemented.
481
478
  - **Scope**: rewrites image `src` attributes and their outer anchor `href` wrappers; rewrites attachment `<a href>` links. Does **not** rewrite inter-page, inter-book, inter-chapter, or inter-shelf links (deferred to a future issue).
482
479
  - **Legacy alias**: the old key `modify_markdown` will be removed in a future version. Rename to `modify_links` in your configuration.
483
480
 
@@ -495,20 +492,31 @@ Page (parent) -> Images (children) relationships are created and then each image
495
492
 
496
493
  #### HTML example
497
494
 
498
- Bookstack HTML exports wrap images in an anchor tag (click-to-zoom). Both the `<img src>` and the outer `<a href>` are rewritten to the same local file.
495
+ Bookstack HTML exports wrap images in an anchor tag (click-to-zoom). Both the
496
+ `<img src>` and the outer `<a href>` are rewritten to the same local file.
497
+ Images appear in one of two forms; both are localized:
499
498
 
500
499
  ```html
501
- <!-- before: anchor-wrapped image -->
500
+ <!-- before: remote "scaled" thumbnail src (older bookstack installations) -->
501
+ <a href="https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png">
502
+ <img src="https://demo.bookstack/uploads/images/gallery/2023-07/scaled-1680-/pool-topology-1.png">
503
+ </a>
504
+
505
+ <!-- before: inline base64 src (recent bookstack installations) -->
502
506
  <a href="https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png">
503
507
  <img src="data:image/png;base64,...">
504
508
  </a>
505
509
 
506
- <!-- after -->
510
+ <!-- after (both forms): src and href point at the one local file -->
507
511
  <a href="images/{page_name}/pool-topology-1.png">
508
- <img src="data:image/png;base64,...">
512
+ <img src="images/{page_name}/pool-topology-1.png">
509
513
  </a>
510
514
  ```
511
515
 
516
+ Inline base64 images are de-inlined to the local file (shrinking the export by
517
+ up to ~700 KB per full-size image). A base64 image **not** wrapped in a
518
+ downloadable anchor is left inline (it still resolves offline).
519
+
512
520
  Attachment links are rewritten from the live URL to a local relative path.
513
521
 
514
522
  ```html
@@ -523,7 +531,7 @@ Attachment links are rewritten from the live URL to a local relative path.
523
531
 
524
532
  Markdown exports use raw `bytes.replace` — no structural awareness (e.g. DOM). If an attachment URL for some reason appears verbatim anywhere in the markdown source (code block, pre, comment, plain text), it gets replaced.
525
533
 
526
- HTML exports are safe because bs4 filters to only <img src> / <a href> attributes before replacing.
534
+ HTML exports are safe because bs4 filters to only `<img src> / <a href>` attributes before replacing.
527
535
 
528
536
  ## Object Storage
529
537
  Optionally, target(s) can be specified to upload generated archives to a remote location. Supported object storage providers can be found below:
@@ -614,24 +622,30 @@ Below are versions that have major changes to the way configuration or exporter
614
622
 
615
623
  ## Running Tests
616
624
 
617
- Install dev dependencies and run the test suite:
625
+ This project uses [uv](https://docs.astral.sh/uv/) for development. Sync dev dependencies and run the test suite:
626
+
627
+ ```bash
628
+ uv sync --all-groups
629
+ uv run pytest
630
+ ```
631
+
632
+ Or via the [Taskfile](https://taskfile.dev) target:
618
633
 
619
634
  ```bash
620
- pip install -e ".[dev]"
621
- pytest
635
+ task test
622
636
  ```
623
637
 
624
638
  The pytest run includes coverage by default (configured in `pyproject.toml`). For an HTML coverage report:
625
639
 
626
640
  ```bash
627
- pytest --cov-report=html
641
+ uv run pytest --cov-report=html
628
642
  open htmlcov/index.html
629
643
  ```
630
644
 
631
645
  To run only unit tests (skipping integration tests):
632
646
 
633
647
  ```bash
634
- pytest tests/unit
648
+ uv run pytest tests/unit
635
649
  ```
636
650
 
637
651
  To run only the integration tests:
@@ -1,11 +1,15 @@
1
- from typing import List, Dict
2
1
  from datetime import datetime
3
2
  import logging
4
3
  import os
5
4
 
6
5
  from bookstack_file_exporter.exporter.node import Node
7
6
  from bookstack_file_exporter.archiver import util
8
- from bookstack_file_exporter.archiver.page_archiver import PageArchiver
7
+ from bookstack_file_exporter.archiver.node_archiver import (
8
+ NodeArchiver,
9
+ BookArchiver,
10
+ ChapterArchiver,
11
+ PageArchiver,
12
+ )
9
13
  from bookstack_file_exporter.archiver.minio_archiver import MinioArchiver
10
14
  from bookstack_file_exporter.config_helper.remote import StorageProviderConfig
11
15
  from bookstack_file_exporter.config_helper.config_helper import ConfigNode
@@ -18,7 +22,7 @@ _DATE_STR_FORMAT = "%Y-%m-%d_%H-%M-%S"
18
22
  # pylint: disable=too-many-instance-attributes
19
23
  class Archiver:
20
24
  """
21
- Archiver helps handle archive duties: pulls all the necessary files from upstream
25
+ Archiver helps handle archive duties: pulls all the necessary files from upstream
22
26
  and then pushes them to the specified backup location(s)
23
27
 
24
28
  Args:
@@ -26,15 +30,41 @@ class Archiver:
26
30
  :http_client: <HttpHelper> = http helper functions with config from user inputs
27
31
 
28
32
  Returns:
29
- Archiver instance with attributes that are accessible
33
+ Archiver instance with attributes that are accessible
30
34
  for use for handling bookstack exports and remote uploads.
31
35
  """
32
36
  def __init__(self, config: ConfigNode, http_client: HttpHelper):
33
37
  self.config = config
34
38
  # for convenience
35
- self.base_dir = config.base_dir_name
39
+ self.base_dir = self._level_base_dir(config.base_dir_name,
40
+ config.user_inputs.export_level)
36
41
  self.archive_dir = self._generate_root_folder(self.base_dir)
37
- self._page_archiver = PageArchiver(self.archive_dir, self.config, http_client)
42
+ self._archiver: NodeArchiver = self._build_archiver(http_client)
43
+
44
+ def _build_archiver(self, http_client: HttpHelper) -> NodeArchiver:
45
+ """Return the appropriate archiver based on the configured export level."""
46
+ export_level = self.config.user_inputs.export_level
47
+ export_meta: bool = self.config.user_inputs.assets.export_meta
48
+ if export_level == "books":
49
+ return BookArchiver(
50
+ archive_dir=self.archive_dir,
51
+ api_urls=self.config.urls,
52
+ export_formats=self.config.user_inputs.formats,
53
+ http_client=http_client,
54
+ export_meta=export_meta,
55
+ asset_config=self.config.user_inputs.assets,
56
+ )
57
+ if export_level == "chapters":
58
+ return ChapterArchiver(
59
+ archive_dir=self.archive_dir,
60
+ api_urls=self.config.urls,
61
+ export_formats=self.config.user_inputs.formats,
62
+ http_client=http_client,
63
+ export_meta=export_meta,
64
+ asset_config=self.config.user_inputs.assets,
65
+ )
66
+ # default: "pages"
67
+ return PageArchiver(self.archive_dir, self.config, http_client)
38
68
 
39
69
  def create_export_dir(self):
40
70
  """create directory for archiving"""
@@ -52,17 +82,23 @@ class Archiver:
52
82
  "attempting to skip this step")
53
83
  return
54
84
 
55
- def get_bookstack_exports(self, page_nodes: Dict[int, Node]):
56
- """export all page content"""
57
- log.info("Exporting all bookstack page contents")
58
- # get images first if requested
59
- # this is because we may want to manipulate page data with modify_links flag
60
- self._page_archiver.archive_pages(page_nodes)
85
+ def get_bookstack_exports(self, nodes: dict[int, Node]):
86
+ """export all node content (polymorphic: pages, books, or chapters)"""
87
+ log.info("Exporting all bookstack contents")
88
+ self._archiver.archive(nodes)
89
+
90
+ @property
91
+ def has_exported_content(self) -> bool:
92
+ """True if the intermediate tar exists, i.e. at least one file was written.
93
+
94
+ Checked against the tar on disk (ground truth) rather than a flag threaded
95
+ up from the archivers, so it cannot drift from what was actually archived.
96
+ """
97
+ return os.path.exists(self._archiver.tar_file)
61
98
 
62
99
  def create_archive(self):
63
100
  """create tgz archive"""
64
- # check if tar needs to be created first
65
- self._page_archiver.gzip_archive()
101
+ self._archiver.gzip_archive()
66
102
 
67
103
  # send to remote systems
68
104
  def archive_remote(self):
@@ -84,8 +120,8 @@ class Archiver:
84
120
  def _archive_minio(self, obj_config: StorageProviderConfig):
85
121
  minio_archiver = MinioArchiver(obj_config.access_key,
86
122
  obj_config.secret_key, obj_config.config)
87
- minio_archiver.upload_backup(self._page_archiver.archive_file)
88
- minio_archiver.clean_up(self._page_archiver.file_extension_map['tgz'])
123
+ minio_archiver.upload_backup(self._archiver.archive_file)
124
+ minio_archiver.clean_up(self._archiver.file_extension_map['tgz'])
89
125
 
90
126
  def _archive_s3(self, obj_config: StorageProviderConfig):
91
127
  raise NotImplementedError("S3 remote storage is not yet implemented")
@@ -99,11 +135,11 @@ class Archiver:
99
135
  if to_delete:
100
136
  self._delete_files(to_delete)
101
137
 
102
- def _get_stale_archives(self) -> List[str]:
138
+ def _get_stale_archives(self) -> list[str]:
103
139
  # if user is uploading to object storage
104
140
  # delete the local .tgz archive since we have it there already
105
- archive_list: List[str] = util.scan_archives(self.base_dir,
106
- self._page_archiver.file_extension_map['tgz'])
141
+ archive_list: list[str] = util.scan_archives(self.base_dir,
142
+ self._archiver.file_extension_map['tgz'])
107
143
  if not archive_list:
108
144
  log.debug("No archive files found to clean up")
109
145
  return []
@@ -120,7 +156,7 @@ class Archiver:
120
156
  to_delete = self._filter_archives(archive_list)
121
157
  return to_delete
122
158
 
123
- def _filter_archives(self, file_list: List[str]) -> List[str]:
159
+ def _filter_archives(self, file_list: list[str]) -> list[str]:
124
160
  """get older archives based on keep number"""
125
161
  file_dict = {file: os.stat(file).st_ctime for file in file_list}
126
162
  ordered = sorted(file_dict.items(), key=lambda item: item[1])
@@ -134,10 +170,22 @@ class Archiver:
134
170
  log.debug("%d local archives will be cleaned up", len(files_to_clean))
135
171
  return files_to_clean
136
172
 
137
- def _delete_files(self, file_list: List[str]):
173
+ def _delete_files(self, file_list: list[str]):
138
174
  for file in file_list:
139
175
  util.remove_file(file)
140
176
 
177
+ @staticmethod
178
+ def _level_base_dir(base_dir: str, export_level: str) -> str:
179
+ """Append the export level to the archive base name for non-default levels.
180
+
181
+ `pages` (the default) stays byte-identical to prior behavior; `books` and
182
+ `chapters` get a distinguishable name (e.g. `bkps_books`). Because keep_last
183
+ cleanup globs on this base, retention is naturally scoped per level.
184
+ """
185
+ if export_level == "pages":
186
+ return base_dir
187
+ return f"{base_dir}_{export_level}"
188
+
141
189
  @staticmethod
142
190
  def _generate_root_folder(base_folder_name: str) -> str:
143
191
  """return base archive name"""