confluence-space-exporter 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- confluence_space_exporter-0.1.0/.gitignore +55 -0
- confluence_space_exporter-0.1.0/CHANGELOG.md +21 -0
- confluence_space_exporter-0.1.0/LICENSE +21 -0
- confluence_space_exporter-0.1.0/PKG-INFO +402 -0
- confluence_space_exporter-0.1.0/README.md +331 -0
- confluence_space_exporter-0.1.0/examples/config.example.json +44 -0
- confluence_space_exporter-0.1.0/examples/use_as_library.py +125 -0
- confluence_space_exporter-0.1.0/pyproject.toml +104 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/__init__.py +28 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/__main__.py +6 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/auth.py +211 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/cli.py +710 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/client.py +235 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/config.py +164 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/converter.py +332 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/exporter.py +439 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/filename.py +66 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/formatters.py +238 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/gui.py +981 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/html_cleaner.py +188 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/lockfile.py +40 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/logging_utils.py +35 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/merger.py +348 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/paths.py +102 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/pdf_engines.py +327 -0
- confluence_space_exporter-0.1.0/src/confluence_exporter/ui.py +163 -0
- confluence_space_exporter-0.1.0/tests/__init__.py +0 -0
- confluence_space_exporter-0.1.0/tests/test_auth.py +77 -0
- confluence_space_exporter-0.1.0/tests/test_client_urls.py +69 -0
- confluence_space_exporter-0.1.0/tests/test_config.py +56 -0
- confluence_space_exporter-0.1.0/tests/test_converter_init.py +64 -0
- confluence_space_exporter-0.1.0/tests/test_exporter_diff.py +243 -0
- confluence_space_exporter-0.1.0/tests/test_filename.py +29 -0
- confluence_space_exporter-0.1.0/tests/test_html_cleaner.py +104 -0
- confluence_space_exporter-0.1.0/tests/test_paths.py +45 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# --- Python ---
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
*.egg-info/
|
|
8
|
+
*.egg
|
|
9
|
+
build/
|
|
10
|
+
dist/
|
|
11
|
+
wheels/
|
|
12
|
+
pip-wheel-metadata/
|
|
13
|
+
.tox/
|
|
14
|
+
.nox/
|
|
15
|
+
.coverage
|
|
16
|
+
.coverage.*
|
|
17
|
+
coverage.xml
|
|
18
|
+
htmlcov/
|
|
19
|
+
.pytest_cache/
|
|
20
|
+
.mypy_cache/
|
|
21
|
+
.ruff_cache/
|
|
22
|
+
|
|
23
|
+
# --- Virtual envs ---
|
|
24
|
+
.venv/
|
|
25
|
+
venv/
|
|
26
|
+
env/
|
|
27
|
+
ENV/
|
|
28
|
+
|
|
29
|
+
# --- IDE ---
|
|
30
|
+
.idea/
|
|
31
|
+
.vscode/
|
|
32
|
+
*.swp
|
|
33
|
+
*.swo
|
|
34
|
+
|
|
35
|
+
# --- OS ---
|
|
36
|
+
.DS_Store
|
|
37
|
+
Thumbs.db
|
|
38
|
+
desktop.ini
|
|
39
|
+
|
|
40
|
+
# --- Project output / secrets ---
|
|
41
|
+
# NEVER commit user secrets or downloaded data
|
|
42
|
+
config.json
|
|
43
|
+
config.local.json
|
|
44
|
+
*.local.json
|
|
45
|
+
output/
|
|
46
|
+
output_*/
|
|
47
|
+
*_converted/
|
|
48
|
+
*_volumes/
|
|
49
|
+
confluence-lock.json
|
|
50
|
+
*.pdf
|
|
51
|
+
*.docx
|
|
52
|
+
|
|
53
|
+
# Allow examples and docs PDFs if needed:
|
|
54
|
+
!examples/**/*.pdf
|
|
55
|
+
!docs/**/*.pdf
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.0] - 2026-06-22
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Interactive Rich-powered TUI with banners, colored prompts, progress bars, and summary tables.
|
|
12
|
+
- Three subcommands: `export`, `convert`, `merge` — each also reachable from an interactive menu.
|
|
13
|
+
- **Export**: download an entire Confluence Cloud space (pages + attachments + Gliffy diagrams) as HTML, Markdown, DOCX, or PDF.
|
|
14
|
+
- **Convert**: turn a tree of exported HTML files into clean PDFs/DOCX, with inline attachment embedding and automatic PDF-attachment merging.
|
|
15
|
+
- **Merge**: consolidate many per-page PDFs into volumes with a generated Table of Contents and a hierarchical PDF bookmark outline — NotebookLM-ready.
|
|
16
|
+
- Three authentication strategies: API token (Basic), Personal Access Token (Bearer), and **generic browser-cookie paste** that accepts any session cookie name (`cloud.session.token`, `tenant.session.token`, or a whole `Cookie:` header copied from DevTools).
|
|
17
|
+
- Pluggable PDF engine system with automatic fallback: Playwright → WeasyPrint → xhtml2pdf.
|
|
18
|
+
- Windows long-path (`\\?\`) handling across every file I/O.
|
|
19
|
+
- Per-space lockfile for resumable, incremental exports.
|
|
20
|
+
|
|
21
|
+
[0.1.0]: https://github.com/LeoChi/confluence-exporter/releases/tag/v0.1.0
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Leano Chiodo
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: confluence-space-exporter
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Export a Confluence space to PDF/DOCX/Markdown/HTML with attachments, merging, and an interactive UI.
|
|
5
|
+
Project-URL: Homepage, https://github.com/LeoChi/confluence-exporter
|
|
6
|
+
Project-URL: Repository, https://github.com/LeoChi/confluence-exporter
|
|
7
|
+
Project-URL: Issues, https://github.com/LeoChi/confluence-exporter/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/LeoChi/confluence-exporter/blob/main/CHANGELOG.md
|
|
9
|
+
Author: Leano Chiodo
|
|
10
|
+
License: MIT License
|
|
11
|
+
|
|
12
|
+
Copyright (c) 2026 Leano Chiodo
|
|
13
|
+
|
|
14
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
15
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
16
|
+
in the Software without restriction, including without limitation the rights
|
|
17
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
18
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
19
|
+
furnished to do so, subject to the following conditions:
|
|
20
|
+
|
|
21
|
+
The above copyright notice and this permission notice shall be included in all
|
|
22
|
+
copies or substantial portions of the Software.
|
|
23
|
+
|
|
24
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
25
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
26
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
27
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
28
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
29
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
30
|
+
SOFTWARE.
|
|
31
|
+
License-File: LICENSE
|
|
32
|
+
Keywords: atlassian,confluence,documentation,exporter,notebooklm,pdf
|
|
33
|
+
Classifier: Development Status :: 4 - Beta
|
|
34
|
+
Classifier: Environment :: Console
|
|
35
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
36
|
+
Classifier: Intended Audience :: Information Technology
|
|
37
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
38
|
+
Classifier: Operating System :: OS Independent
|
|
39
|
+
Classifier: Programming Language :: Python :: 3
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
41
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
42
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
43
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
44
|
+
Classifier: Topic :: Documentation
|
|
45
|
+
Classifier: Topic :: Utilities
|
|
46
|
+
Requires-Python: >=3.10
|
|
47
|
+
Requires-Dist: beautifulsoup4>=4.12
|
|
48
|
+
Requires-Dist: lxml>=4.9
|
|
49
|
+
Requires-Dist: markdownify>=0.11
|
|
50
|
+
Requires-Dist: pillow>=10.0
|
|
51
|
+
Requires-Dist: pypdf>=4.0
|
|
52
|
+
Requires-Dist: python-docx>=1.1
|
|
53
|
+
Requires-Dist: requests>=2.31
|
|
54
|
+
Requires-Dist: rich>=13.7
|
|
55
|
+
Requires-Dist: typer>=0.12
|
|
56
|
+
Requires-Dist: xhtml2pdf>=0.2.11
|
|
57
|
+
Provides-Extra: all
|
|
58
|
+
Requires-Dist: playwright>=1.40; extra == 'all'
|
|
59
|
+
Requires-Dist: weasyprint>=60.0; extra == 'all'
|
|
60
|
+
Provides-Extra: dev
|
|
61
|
+
Requires-Dist: build>=1.2; extra == 'dev'
|
|
62
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
63
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
64
|
+
Requires-Dist: ruff>=0.6; extra == 'dev'
|
|
65
|
+
Requires-Dist: twine>=5.0; extra == 'dev'
|
|
66
|
+
Provides-Extra: playwright
|
|
67
|
+
Requires-Dist: playwright>=1.40; extra == 'playwright'
|
|
68
|
+
Provides-Extra: weasyprint
|
|
69
|
+
Requires-Dist: weasyprint>=60.0; extra == 'weasyprint'
|
|
70
|
+
Description-Content-Type: text/markdown
|
|
71
|
+
|
|
72
|
+
<!-- markdownlint-disable MD033 MD041 -->
|
|
73
|
+
<div align="center">
|
|
74
|
+
|
|
75
|
+
# Confluence Exporter
|
|
76
|
+
|
|
77
|
+
**Export an entire Confluence space to PDF, DOCX, Markdown or HTML — with attachments embedded, optional consolidated volumes, and an interactive terminal UI.**
|
|
78
|
+
|
|
79
|
+
[](https://pypi.org/project/confluence-space-exporter/)
|
|
80
|
+
[](https://pypi.org/project/confluence-space-exporter/)
|
|
81
|
+
[](LICENSE)
|
|
82
|
+
|
|
83
|
+
</div>
|
|
84
|
+
|
|
85
|
+
Built for teams that need to **archive, migrate, or feed their wiki into an LLM** (NotebookLM, RAG pipelines, LLM fine-tuning). The tool produces clean, self-contained PDFs with embedded images and merged attachment appendices — no missing links, no 0 KB files, no orphaned binary blobs.
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## Highlights
|
|
90
|
+
|
|
91
|
+
- 🔐 **Three auth strategies**: API token (Basic), Personal Access Token (Bearer), or **browser-cookie paste** — works on SSO-locked tenants where API tokens are disabled. Cookie parser is generic: paste the full `Cookie:` header from DevTools and any session-cookie name (`cloud.session.token`, `tenant.session.token`, …) works.
|
|
92
|
+
- 🖨️ **Pluggable PDF engines** with automatic fallback: **Playwright → WeasyPrint → xhtml2pdf**. Pick one explicitly or let the tool auto-detect the best one you have installed.
|
|
93
|
+
- 📎 **Attachments really embedded**: PDFs get merged as appendix pages into each document; images are inlined; others are listed with links.
|
|
94
|
+
- 📚 **Consolidated volumes**: merge the per-page PDFs of a space into a few big PDFs with a generated **Table of Contents** and a **hierarchical PDF bookmark outline** — upload-ready for NotebookLM.
|
|
95
|
+
- 🪟 **Windows long-path safe**: deep Confluence hierarchies that exceed `MAX_PATH` (260 chars) are handled via `\\?\` prefixing and a `_flat` fallback bucket.
|
|
96
|
+
- ⚡ **Resumable**: a per-space lockfile skips pages that haven't changed, so re-runs take seconds.
|
|
97
|
+
- 🎨 **Nice terminal UI**: colored prompts, progress bars, summary tables (powered by [Rich](https://github.com/Textualize/rich) + [Typer](https://typer.tiangolo.com/)).
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Installation
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
pip install confluence-space-exporter
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
For best-quality PDFs (recommended), also install Playwright:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
pip install "confluence-space-exporter[playwright]"
|
|
111
|
+
playwright install chromium
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
All engines in one shot:
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
pip install "confluence-space-exporter[all]"
|
|
118
|
+
playwright install chromium
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Requires Python **3.10+**.
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## Three ways to use it
|
|
126
|
+
|
|
127
|
+
The same codebase ships with three entry surfaces — the business logic lives in the library; CLI and GUI are thin adapters on top.
|
|
128
|
+
|
|
129
|
+
| Mode | When to use it | Launch |
|
|
130
|
+
| ------------------ | --------------------------------------------------------------- | -------------------------------------------------- |
|
|
131
|
+
| 🖱️ **Desktop app** | Prefer a window with forms, file pickers and a live log pane | `confluence-exporter-gui` *(alias: `cfx-gui`)* |
|
|
132
|
+
| 💻 **CLI / TUI** | SSH sessions, scripts, CI pipelines, or a rich terminal UI | `confluence-exporter` *(alias: `cfx`)* |
|
|
133
|
+
| 📦 **Library** | Embed the export inside your own Python code / data pipeline | `from confluence_exporter import SpaceExporter, …` |
|
|
134
|
+
|
|
135
|
+
### Desktop app
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
confluence-exporter-gui
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
A Tkinter window with tabs for **Connection → Export → Convert → Merge → Diagnose**, a progress bar, and a live log pane. All long tasks run on a background thread so the UI stays responsive. No extra dependencies — Tkinter ships with Python. (On some Linux distros you may need `sudo apt install python3-tk`.)
|
|
142
|
+
|
|
143
|
+
The status bar has a **⏹ Stop** button that cancels the current task gracefully — the runner finishes the page it's on, saves the lockfile, and shuts down cleanly (no torn HTTP connections, no half-written PDFs). Run buttons disable while a task is running and re-enable when it finishes or cancels. Window geometry is remembered between sessions in `~/.confluence-exporter-gui.json`.
|
|
144
|
+
|
|
145
|
+
The Connection tab shows a **welcome banner** while config is incomplete (it disappears once you've set base URL + space key + credentials), and the cookie textbox **parses cookies live as you paste** — you'll see "✓ 12 cookie(s) parsed — session token: cloud.session.token" before you even click Test connection.
|
|
146
|
+
|
|
147
|
+
### CLI
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
# Interactive menu (recommended first run)
|
|
151
|
+
confluence-exporter
|
|
152
|
+
|
|
153
|
+
# Short alias
|
|
154
|
+
cfx
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
The tool walks you through auth setup, target space, and format, and saves your choices to `config.json`. Subsequent runs re-use it.
|
|
158
|
+
|
|
159
|
+
Non-interactive / scripted:
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
cfx export --space MYKEY --format pdf --output ./out -y
|
|
163
|
+
cfx convert ./out --engine playwright --merge -y
|
|
164
|
+
cfx merge ./out_converted ./out_volumes --mode per_section -y
|
|
165
|
+
|
|
166
|
+
cfx diagnose # check installed engines + credentials
|
|
167
|
+
cfx init-config # edit / (re)create config.json
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Press **Ctrl+C** during an export to cancel gracefully — the current page finishes, the lockfile is saved, and the next run picks up where you left off.
|
|
171
|
+
|
|
172
|
+
Run `cfx <command> --help` for all options.
|
|
173
|
+
|
|
174
|
+
### Library
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
from pathlib import Path
|
|
178
|
+
from confluence_exporter import (
|
|
179
|
+
AppConfig, ConfluenceClient, SpaceExporter, OutputConverter, PDFMerger,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
cfg = AppConfig()
|
|
183
|
+
cfg.confluence.base_url = "https://your-tenant.atlassian.net"
|
|
184
|
+
cfg.confluence.space_key = "ABC"
|
|
185
|
+
cfg.confluence.auth_mode = "api_token"
|
|
186
|
+
cfg.confluence.email = "you@example.com"
|
|
187
|
+
cfg.confluence.api_token = "…"
|
|
188
|
+
|
|
189
|
+
client = ConfluenceClient.from_config(cfg.confluence)
|
|
190
|
+
SpaceExporter(cfg, client).run()
|
|
191
|
+
|
|
192
|
+
OutputConverter(
|
|
193
|
+
output_root=Path(cfg.export.output_path),
|
|
194
|
+
target_format="pdf",
|
|
195
|
+
engine="auto",
|
|
196
|
+
).run()
|
|
197
|
+
|
|
198
|
+
PDFMerger(
|
|
199
|
+
source_root=Path(cfg.export.output_path + "_converted"),
|
|
200
|
+
dest_root=Path("./volumes"),
|
|
201
|
+
mode="per_section",
|
|
202
|
+
).run()
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
A full worked example — including progress callbacks and all three auth modes — is in [`examples/use_as_library.py`](examples/use_as_library.py).
|
|
206
|
+
|
|
207
|
+
**Programmatic cancellation** — pass any `threading.Event` and set it from another thread (or a signal handler) to bail out gracefully:
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
import threading
|
|
211
|
+
cancel = threading.Event()
|
|
212
|
+
|
|
213
|
+
# Cancel after 30s if it isn't done by then
|
|
214
|
+
threading.Timer(30, cancel.set).start()
|
|
215
|
+
|
|
216
|
+
SpaceExporter(cfg, client, cancel_event=cancel).run()
|
|
217
|
+
# OutputConverter(..., cancel_event=cancel)
|
|
218
|
+
# PDFMerger(..., cancel_event=cancel)
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
Cancellation is **cooperative**: the runner checks the event between pages/files/groups and stops cleanly — no torn HTTP requests, no half-written PDFs.
|
|
222
|
+
|
|
223
|
+
---
|
|
224
|
+
|
|
225
|
+
## Three modes, end to end
|
|
226
|
+
|
|
227
|
+
### 1. **Export** — download a Confluence space
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
cfx export
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
Writes pages + attachments to the output folder:
|
|
234
|
+
|
|
235
|
+
```
|
|
236
|
+
output/
|
|
237
|
+
└── MySpace/
|
|
238
|
+
├── Overview/
|
|
239
|
+
│ ├── Introduction.html
|
|
240
|
+
│ └── Architecture.html
|
|
241
|
+
├── attachments/
|
|
242
|
+
│ ├── Introduction/
|
|
243
|
+
│ │ └── diagram.png
|
|
244
|
+
│ └── _flat/
|
|
245
|
+
│ └── 12345_long-attachment-name.pdf
|
|
246
|
+
└── _flat/ # pages whose path was too long for Windows
|
|
247
|
+
└── Nested_Deep_Page_9999.html
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
#### Incremental updates
|
|
251
|
+
|
|
252
|
+
Re-running `export` is **safe and fast**: it compares the live Confluence space against the per-space lockfile and only downloads what actually changed. Each page falls into one of four buckets:
|
|
253
|
+
|
|
254
|
+
| State | Meaning | Action |
|
|
255
|
+
| --- | --- | --- |
|
|
256
|
+
| **NEW** | Page exists in Confluence, not in the lockfile | Download |
|
|
257
|
+
| **UPDATED** | Newer version on Confluence (or local file is missing) | Re-download |
|
|
258
|
+
| **UNCHANGED** | Same version, file still on disk | Skip |
|
|
259
|
+
| **DELETED-UPSTREAM** | In the lockfile but no longer in Confluence | Optionally remove (`cleanup_stale: true`) |
|
|
260
|
+
|
|
261
|
+
To **preview** what an export would do — without downloading anything — use the `status` command:
|
|
262
|
+
|
|
263
|
+
```bash
|
|
264
|
+
cfx status # summary only
|
|
265
|
+
cfx status --titles # also list the actual page titles
|
|
266
|
+
cfx status --titles -n 50 # bump the per-bucket cap from 20 to 50
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
Sample output:
|
|
270
|
+
|
|
271
|
+
```
|
|
272
|
+
Diff
|
|
273
|
+
┏━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━┓
|
|
274
|
+
┃ Status ┃ # ┃
|
|
275
|
+
┡━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━┩
|
|
276
|
+
│ New │ 3 │
|
|
277
|
+
│ Updated │ 12 │
|
|
278
|
+
│ Unchanged │ 247 │
|
|
279
|
+
│ Deleted upstream │ 1 │
|
|
280
|
+
│ Total in Confluence │ 262 │
|
|
281
|
+
└───────────────────────┴──────┘
|
|
282
|
+
→ Running export would download 3 new + 12 updated page(s).
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
The GUI exposes the same thing as a "🔍 Check status" button on the Export tab.
|
|
286
|
+
|
|
287
|
+
> **Tip**: if you delete a PDF on disk, the next run notices and re-downloads it (the lockfile alone isn't trusted — we also check the file is actually there).
|
|
288
|
+
|
|
289
|
+
### 2. **Convert** — HTML → PDF / DOCX with embedded attachments
|
|
290
|
+
|
|
291
|
+
```bash
|
|
292
|
+
cfx convert ./output
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
Produces `./output_converted/` mirroring the source tree. Each PDF has its page's PDF attachments merged as appendix pages. The conversion tolerates deep Windows paths (renders via `%TEMP%` and moves into place) and validates every output via magic-byte + size checks — **no 0 KB files slip through**.
|
|
296
|
+
|
|
297
|
+
### 3. **Merge** — consolidated volumes for NotebookLM / archival
|
|
298
|
+
|
|
299
|
+
```bash
|
|
300
|
+
cfx merge ./output_converted ./output_volumes
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
Three grouping modes:
|
|
304
|
+
|
|
305
|
+
| Mode | Output | Best for |
|
|
306
|
+
| -------------- | ------------------------------------------------ | ------------------------------------------------------- |
|
|
307
|
+
| `per_section` | One PDF per top-level folder of each space | NotebookLM sources (smaller, focused) |
|
|
308
|
+
| `per_space` | One PDF per Confluence space | Sharing a whole space as a single file |
|
|
309
|
+
| `single` | One PDF for everything | Archival / grep-friendly single file |
|
|
310
|
+
|
|
311
|
+
Each volume contains a generated **Table of Contents** page (page numbers + nesting) and a **PDF outline** that every reader (Acrobat, Edge, Chrome, Foxit…) shows as a navigation sidebar.
|
|
312
|
+
|
|
313
|
+
---
|
|
314
|
+
|
|
315
|
+
## Authentication
|
|
316
|
+
|
|
317
|
+
### Option 1 — Atlassian API token (easiest, if your admin allows it)
|
|
318
|
+
|
|
319
|
+
```json
|
|
320
|
+
"auth_mode": "api_token",
|
|
321
|
+
"email": "you@company.com",
|
|
322
|
+
"api_token": "ATATT3x…"
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
Get a token at <https://id.atlassian.com/manage-profile/security/api-tokens>.
|
|
326
|
+
|
|
327
|
+
### Option 2 — Browser cookie (for SSO-only tenants)
|
|
328
|
+
|
|
329
|
+
```json
|
|
330
|
+
"auth_mode": "browser_cookie",
|
|
331
|
+
"cookies": {
|
|
332
|
+
"cloud.session.token": "eyJ…",
|
|
333
|
+
"atlassian.xsrf.token": "…"
|
|
334
|
+
}
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
Easiest way to set this up:
|
|
338
|
+
|
|
339
|
+
```bash
|
|
340
|
+
cfx init-config # or choose menu option 5
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
The tool walks you through copying the full `Cookie:` header from DevTools — it then parses and forwards every cookie the browser would send, so it doesn't matter whether your tenant uses `cloud.session.token`, `tenant.session.token`, `JSESSIONID` or something else.
|
|
344
|
+
|
|
345
|
+
### Option 3 — Personal Access Token (Server / Data Center)
|
|
346
|
+
|
|
347
|
+
```json
|
|
348
|
+
"auth_mode": "pat",
|
|
349
|
+
"personal_access_token": "NjAxM…"
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
Sent as `Authorization: Bearer …`.
|
|
353
|
+
|
|
354
|
+
---
|
|
355
|
+
|
|
356
|
+
## Configuration
|
|
357
|
+
|
|
358
|
+
A full `config.json` looks like [`examples/config.example.json`](examples/config.example.json). All fields have sane defaults and are overridable via CLI flags.
|
|
359
|
+
|
|
360
|
+
Keys starting with `_` are treated as inline documentation and ignored at load time.
|
|
361
|
+
|
|
362
|
+
---
|
|
363
|
+
|
|
364
|
+
## Standalone executable (no Python required for end users)
|
|
365
|
+
|
|
366
|
+
If you want to ship a double-clickable `.exe` / `.app` for users who don't have Python installed, bundle it with PyInstaller:
|
|
367
|
+
|
|
368
|
+
```bash
|
|
369
|
+
pip install "confluence-space-exporter[all]" pyinstaller
|
|
370
|
+
# Windows / macOS / Linux (run on the target OS):
|
|
371
|
+
pyinstaller --name ConfluenceExporter --windowed --onefile ^
|
|
372
|
+
--collect-all confluence_exporter ^
|
|
373
|
+
-m confluence_exporter.gui
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
The resulting `dist/ConfluenceExporter.exe` (or `.app` on macOS) embeds Python and all dependencies. For the Playwright engine specifically, Chromium binaries are large and best installed separately after first launch — in a bundled build, prefer `weasyprint` or `xhtml2pdf` out of the box.
|
|
377
|
+
|
|
378
|
+
---
|
|
379
|
+
|
|
380
|
+
## Troubleshooting
|
|
381
|
+
|
|
382
|
+
| Symptom | Likely cause / fix |
|
|
383
|
+
| ------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------- |
|
|
384
|
+
| `HTTP 401 Unauthorized` | API token disabled by admin → switch to `browser_cookie` mode. |
|
|
385
|
+
| `HTTP 403 Forbidden` when exporting PDFs | Confluence's native PDF endpoint is disabled; the tool will fall through to local rendering automatically. |
|
|
386
|
+
| `Playwright Chromium binary not installed` | `python -m playwright install chromium`. |
|
|
387
|
+
| `[Errno 2] No such file or directory` (Windows) | Path exceeds MAX_PATH. The tool falls back to `_flat/` automatically — enable it in your run. |
|
|
388
|
+
| 0 KB PDFs | The older engine couldn't render a page. Install Playwright and rerun: `cfx convert --engine playwright`. |
|
|
389
|
+
|
|
390
|
+
Run `cfx diagnose` any time to see what's installed and confirm your credentials.
|
|
391
|
+
|
|
392
|
+
---
|
|
393
|
+
|
|
394
|
+
## Contributing
|
|
395
|
+
|
|
396
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md). PRs welcome!
|
|
397
|
+
|
|
398
|
+
---
|
|
399
|
+
|
|
400
|
+
## License
|
|
401
|
+
|
|
402
|
+
MIT — see [LICENSE](LICENSE).
|