confluence-exporter 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.cjs +18 -0
- package/.github/copilot-instructions.md +3 -0
- package/.github/prompts/analyze.prompt.md +101 -0
- package/.github/prompts/clarify.prompt.md +158 -0
- package/.github/prompts/constitution.prompt.md +73 -0
- package/.github/prompts/implement.prompt.md +56 -0
- package/.github/prompts/plan.prompt.md +50 -0
- package/.github/prompts/specify.prompt.md +21 -0
- package/.github/prompts/tasks.prompt.md +69 -0
- package/LICENSE +21 -0
- package/README.md +332 -0
- package/agents.md +1174 -0
- package/dist/api.d.ts +73 -0
- package/dist/api.js +387 -0
- package/dist/api.js.map +1 -0
- package/dist/commands/download.command.d.ts +18 -0
- package/dist/commands/download.command.js +257 -0
- package/dist/commands/download.command.js.map +1 -0
- package/dist/commands/executor.d.ts +22 -0
- package/dist/commands/executor.js +52 -0
- package/dist/commands/executor.js.map +1 -0
- package/dist/commands/help.command.d.ts +8 -0
- package/dist/commands/help.command.js +68 -0
- package/dist/commands/help.command.js.map +1 -0
- package/dist/commands/index.command.d.ts +14 -0
- package/dist/commands/index.command.js +95 -0
- package/dist/commands/index.command.js.map +1 -0
- package/dist/commands/index.d.ts +13 -0
- package/dist/commands/index.js +13 -0
- package/dist/commands/index.js.map +1 -0
- package/dist/commands/plan.command.d.ts +54 -0
- package/dist/commands/plan.command.js +272 -0
- package/dist/commands/plan.command.js.map +1 -0
- package/dist/commands/registry.d.ts +12 -0
- package/dist/commands/registry.js +32 -0
- package/dist/commands/registry.js.map +1 -0
- package/dist/commands/transform.command.d.ts +69 -0
- package/dist/commands/transform.command.js +951 -0
- package/dist/commands/transform.command.js.map +1 -0
- package/dist/commands/types.d.ts +12 -0
- package/dist/commands/types.js +5 -0
- package/dist/commands/types.js.map +1 -0
- package/dist/commands/update.command.d.ts +10 -0
- package/dist/commands/update.command.js +201 -0
- package/dist/commands/update.command.js.map +1 -0
- package/dist/constants.d.ts +1 -0
- package/dist/constants.js +2 -0
- package/dist/constants.js.map +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +110 -0
- package/dist/index.js.map +1 -0
- package/dist/logger.d.ts +15 -0
- package/dist/logger.js +52 -0
- package/dist/logger.js.map +1 -0
- package/dist/types.d.ts +167 -0
- package/dist/types.js +5 -0
- package/dist/types.js.map +1 -0
- package/dist/utils.d.ts +56 -0
- package/dist/utils.js +178 -0
- package/dist/utils.js.map +1 -0
- package/eslint.config.js +29 -0
- package/jest.config.cjs +25 -0
- package/migrate-meta.js +132 -0
- package/package.json +53 -0
- package/src/api.ts +469 -0
- package/src/commands/download.command.ts +324 -0
- package/src/commands/executor.ts +62 -0
- package/src/commands/help.command.ts +72 -0
- package/src/commands/index.command.ts +111 -0
- package/src/commands/index.ts +14 -0
- package/src/commands/plan.command.ts +318 -0
- package/src/commands/registry.ts +39 -0
- package/src/commands/transform.command.ts +1103 -0
- package/src/commands/types.ts +16 -0
- package/src/commands/update.command.ts +229 -0
- package/src/constants.ts +0 -0
- package/src/index.ts +120 -0
- package/src/logger.ts +60 -0
- package/src/test.sh +66 -0
- package/src/types.ts +176 -0
- package/src/utils.ts +204 -0
- package/tests/commands/README.md +123 -0
- package/tests/commands/download.command.test.ts +8 -0
- package/tests/commands/help.command.test.ts +8 -0
- package/tests/commands/index.command.test.ts +8 -0
- package/tests/commands/plan.command.test.ts +15 -0
- package/tests/commands/transform.command.test.ts +8 -0
- package/tests/fixtures/_index.yaml +38 -0
- package/tests/fixtures/mock-pages.ts +62 -0
- package/tsconfig.json +25 -0
- package/vite.config.ts +45 -0
package/README.md
ADDED
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
# Minimal Confluence to Markdown Exporter
|
|
2
|
+
|
|
3
|
+
A lightweight, standalone CLI tool to export Confluence spaces to Markdown files with hierarchical folder structure.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
### Via npx (recommended)
|
|
8
|
+
```bash
|
|
9
|
+
npx confluence-export <command> [options]
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
### Via npm global install
|
|
13
|
+
```bash
|
|
14
|
+
npm install -g confluence-export
|
|
15
|
+
confluence-export <command> [options]
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
### From source
|
|
19
|
+
```bash
|
|
20
|
+
git clone <repo>
|
|
21
|
+
cd confluence-exporter
|
|
22
|
+
npm install
|
|
23
|
+
npm run build
|
|
24
|
+
node dist/index.js <command> [options]
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Features
|
|
28
|
+
|
|
29
|
+
- 🚀 Minimal dependencies (uses native Node.js fetch)
|
|
30
|
+
- 📄 Command-based CLI with five commands: `help`, `index`, `update`, `plan`, `download`, `transform`
|
|
31
|
+
- 🔄 Four-phase export workflow (indexing → planning → downloading → transforming)
|
|
32
|
+
- 📁 Hierarchical folder structure based on page tree (mirrors Confluence hierarchy)
|
|
33
|
+
- 📝 Separate HTML download and Markdown transformation for flexibility
|
|
34
|
+
- 🔗 HTML to Markdown transformation with Confluence macro support
|
|
35
|
+
- 👤 User link resolution with intelligent caching
|
|
36
|
+
- 📎 Image/attachment downloading with automatic slugification
|
|
37
|
+
- 💾 YAML-based indexing with resume capability
|
|
38
|
+
- ✨ Prettier formatting for consistent output
|
|
39
|
+
|
|
40
|
+
## Prerequisites
|
|
41
|
+
|
|
42
|
+
- Node.js 18+ (for native fetch support)
|
|
43
|
+
|
|
44
|
+
## Usage
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
npx confluence-export <command> [options]
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
or if installed globally:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
confluence-export <command> [options]
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Commands
|
|
57
|
+
|
|
58
|
+
- `help` - Display usage information
|
|
59
|
+
- `index` - Create page inventory (`_index.yaml`)
|
|
60
|
+
- `update` - Check for new/updated pages and update `_index.yaml`
|
|
61
|
+
- `plan` - Create download queue and tree structure (`_queue.yaml` + `_tree.yaml`)
|
|
62
|
+
- `download` - Download HTML pages from queue
|
|
63
|
+
- `transform` - Transform HTML files to Markdown (skips existing MD files, creates links structure)
|
|
64
|
+
|
|
65
|
+
Commands can be chained to run in sequence:
|
|
66
|
+
```bash
|
|
67
|
+
node index.js index plan download transform [options]
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Options
|
|
71
|
+
|
|
72
|
+
| Flag | Long Form | Description | Default |
|
|
73
|
+
|------|-----------|-------------|---------|
|
|
74
|
+
| `-u` | `--url` | Confluence base URL | env: `CONFLUENCE_BASE_URL` |
|
|
75
|
+
| `-n` | `--username` | Username/email | env: `CONFLUENCE_USERNAME` |
|
|
76
|
+
| `-p` | `--password` | API token | env: `CONFLUENCE_PASSWORD` |
|
|
77
|
+
| `-s` | `--space` | Space key | env: `CONFLUENCE_SPACE_KEY` |
|
|
78
|
+
| `-o` | `--output` | Output directory | `./output` or env: `OUTPUT_DIR` |
|
|
79
|
+
| `-i` | `--pageId` | Single page ID (optional) | none |
|
|
80
|
+
| `-l` | `--limit` | Limit number of pages to process | none |
|
|
81
|
+
| | `--parallel` | Number of concurrent operations | `5` |
|
|
82
|
+
| `-f` | `--force` | Force re-download of all pages (skip version check) | false |
|
|
83
|
+
| | `--clear` | Clear existing MD files and images before transforming | false |
|
|
84
|
+
| | `--pageSize` | API page size | `25` |
|
|
85
|
+
| `-h` | `--help` | Show help message | |
|
|
86
|
+
|
|
87
|
+
### Environment Variables
|
|
88
|
+
|
|
89
|
+
- `CONFLUENCE_BASE_URL`
|
|
90
|
+
- `CONFLUENCE_USERNAME`
|
|
91
|
+
- `CONFLUENCE_PASSWORD`
|
|
92
|
+
- `CONFLUENCE_SPACE_KEY`
|
|
93
|
+
- `OUTPUT_DIR`
|
|
94
|
+
|
|
95
|
+
## Examples
|
|
96
|
+
|
|
97
|
+
### Full Space Export (4-phase workflow)
|
|
98
|
+
```bash
|
|
99
|
+
node index.js index plan download transform -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE -o ./output
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Full Space Export with Limit (process first 10 pages only)
|
|
103
|
+
```bash
|
|
104
|
+
node index.js index plan download transform -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE -o ./output -l 10
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Create Index Only (Phase 1)
|
|
108
|
+
```bash
|
|
109
|
+
node index.js index -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Check for New/Updated Pages and Update Existing Index
|
|
113
|
+
```bash
|
|
114
|
+
node index.js update -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Create Download Queue from Existing Index (Phase 2)
|
|
118
|
+
```bash
|
|
119
|
+
node index.js plan -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Create Download Queue for Specific Page and All Children
|
|
123
|
+
```bash
|
|
124
|
+
node index.js plan -i 123456789 -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Force Re-download All Pages (ignore version check)
|
|
128
|
+
```bash
|
|
129
|
+
node index.js plan --force -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### Download HTML Pages from Existing Queue (Phase 3)
|
|
133
|
+
```bash
|
|
134
|
+
node index.js download -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Transform HTML to Markdown (Phase 4)
|
|
138
|
+
```bash
|
|
139
|
+
node index.js transform -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### Transform HTML to Markdown with Clear (remove existing MD files first)
|
|
143
|
+
```bash
|
|
144
|
+
node index.js transform --clear -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Download and Transform Together
|
|
148
|
+
```bash
|
|
149
|
+
node index.js download transform -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Download and Transform with Higher Concurrency
|
|
153
|
+
```bash
|
|
154
|
+
node index.js download transform --parallel 10 -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Download Single Page HTML Only (no index/plan needed)
|
|
158
|
+
```bash
|
|
159
|
+
node index.js download -i 123456789 -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Transform Command Details
|
|
163
|
+
|
|
164
|
+
The `transform` command converts downloaded HTML files from Confluence into Markdown format with the following features:
|
|
165
|
+
|
|
166
|
+
### Key Features
|
|
167
|
+
- **HTML to Markdown Conversion**: Handles Confluence-specific elements like macros (code blocks, panels, user links), images, headers, lists, and links
|
|
168
|
+
- **Image Handling**: Downloads attachments referenced in Confluence image tags and saves them in `images/` subdirectories
|
|
169
|
+
- **Macro Support**: Transforms Confluence macros (e.g., `list-children` fetches child pages, `code` blocks become fenced code, panels become blockquotes)
|
|
170
|
+
- **User Link Resolution**: Converts Confluence user links to `@displayName` format using API calls
|
|
171
|
+
- **Cleanup and Formatting**: Removes HTML tags, entities, and malformed Markdown patterns; formats output with Prettier
|
|
172
|
+
- **Links Structure**: Creates a `links/` folder with symlinks to all MD files and a `_links.md` file showing a hierarchical tree
|
|
173
|
+
- **Resume Capability**: Skips existing Markdown files to allow incremental runs
|
|
174
|
+
- **Error Handling**: Non-fatal errors (e.g., failed image downloads) are logged as warnings
|
|
175
|
+
|
|
176
|
+
### Options
|
|
177
|
+
- `--clear`: Remove existing MD files and images folders before transforming (useful for re-processing)
|
|
178
|
+
- `--limit <number>`: Process only the first N HTML files
|
|
179
|
+
|
|
180
|
+
### Output
|
|
181
|
+
- Markdown files with YAML front matter (title, ID, URL, version, parentId)
|
|
182
|
+
- Downloaded images in `images/` subdirectories per page
|
|
183
|
+
- `links/` folder with symlinks and hierarchical index (`_links.md`)
|
|
184
|
+
|
|
185
|
+
## Output Structure
|
|
186
|
+
|
|
187
|
+
### Hierarchical Structure (when `_tree.yaml` exists)
|
|
188
|
+
```
|
|
189
|
+
outputDir/
|
|
190
|
+
├── _index.yaml # Page index (YAML array)
|
|
191
|
+
├── _queue.yaml # Download queue (YAML array)
|
|
192
|
+
├── _tree.yaml # Hierarchical page tree structure
|
|
193
|
+
└── MYSPACE/ # Root folder (space key)
|
|
194
|
+
├── 123456-page-title.html
|
|
195
|
+
├── 123456-page-title.md
|
|
196
|
+
└── 123456-page-title/ # Folder for children
|
|
197
|
+
├── images/ # Images for child pages
|
|
198
|
+
│ └── logo.png
|
|
199
|
+
├── 789012-child-page.html
|
|
200
|
+
├── 789012-child-page.md
|
|
201
|
+
└── 789012-child-page/ # Nested children
|
|
202
|
+
├── 345678-grandchild.html
|
|
203
|
+
└── 345678-grandchild.md
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
### Flat Structure (fallback when only `_queue.yaml` exists)
|
|
207
|
+
```
|
|
208
|
+
outputDir/
|
|
209
|
+
├── _index.yaml # Page index (YAML array)
|
|
210
|
+
├── _queue.yaml # Download queue (YAML array)
|
|
211
|
+
├── page-title-1.md # Formatted markdown
|
|
212
|
+
├── page-title-1.html # Original HTML (formatted)
|
|
213
|
+
├── page-title-2.md
|
|
214
|
+
├── page-title-2.html
|
|
215
|
+
└── images/ # Shared images folder
|
|
216
|
+
├── image-1.png
|
|
217
|
+
└── image-2.jpg
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## Front Matter Format
|
|
221
|
+
|
|
222
|
+
```yaml
|
|
223
|
+
---
|
|
224
|
+
title: "Page Title"
|
|
225
|
+
id: "123456789"
|
|
226
|
+
url: "https://mysite.atlassian.net/pages/viewpage.action?pageId=123456789"
|
|
227
|
+
version: 5
|
|
228
|
+
parentId: "987654321"
|
|
229
|
+
---
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
## Prettier Formatting
|
|
233
|
+
|
|
234
|
+
**Markdown:**
|
|
235
|
+
- `printWidth: 120`
|
|
236
|
+
- `proseWrap: 'preserve'` (don't reflow text)
|
|
237
|
+
- `tabWidth: 2`
|
|
238
|
+
|
|
239
|
+
**HTML:**
|
|
240
|
+
- `printWidth: 120`
|
|
241
|
+
- `htmlWhitespaceSensitivity: 'ignore'`
|
|
242
|
+
- Consistent 2-space indentation
|
|
243
|
+
|
|
244
|
+
Formatting failures are non-fatal (saves unformatted with warning).
|
|
245
|
+
|
|
246
|
+
## Project Structure
|
|
247
|
+
|
|
248
|
+
```
|
|
249
|
+
src/
|
|
250
|
+
├── index.ts # CLI entry point (arg parsing, config validation)
|
|
251
|
+
├── types.ts # TypeScript type definitions
|
|
252
|
+
├── api.ts # Confluence REST API client
|
|
253
|
+
├── transformer.ts # HTML → Markdown conversion
|
|
254
|
+
├── cleaner.ts # Post-processing cleanup
|
|
255
|
+
└── commands/ # Command handlers (modular architecture)
|
|
256
|
+
├── types.ts # Command-related type definitions
|
|
257
|
+
├── help.command.ts # Help command handler
|
|
258
|
+
├── index.command.ts # Index command handler
|
|
259
|
+
├── update.command.ts # Update command handler
|
|
260
|
+
├── plan.command.ts # Plan command handler
|
|
261
|
+
├── download.command.ts # Download command handler (HTML only)
|
|
262
|
+
├── transform.command.ts # Transform command handler (HTML → MD)
|
|
263
|
+
├── registry.ts # Command registry (maps commands to handlers)
|
|
264
|
+
├── executor.ts # Command executor (orchestrates execution)
|
|
265
|
+
└── index.ts # Exports for easy importing
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
## User Link Resolution
|
|
269
|
+
|
|
270
|
+
The exporter automatically resolves Confluence user links to display names:
|
|
271
|
+
|
|
272
|
+
```html
|
|
273
|
+
<!-- Confluence HTML -->
|
|
274
|
+
<ac:link><ri:user ri:username="john.doe"/></ac:link>
|
|
275
|
+
<ac:link><ri:user ri:userkey="ff8080817b0a1234"/></ac:link>
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
Becomes:
|
|
279
|
+
|
|
280
|
+
```markdown
|
|
281
|
+
@John Doe
|
|
282
|
+
@John Doe
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
Features:
|
|
286
|
+
- ✓ Resolves by username or userkey
|
|
287
|
+
- ✓ Caches user lookups to minimize API calls
|
|
288
|
+
- ✓ Falls back to username if API fails
|
|
289
|
+
- ✓ Handles unknown users gracefully
|
|
290
|
+
|
|
291
|
+
## Development
|
|
292
|
+
|
|
293
|
+
### Build & Run
|
|
294
|
+
|
|
295
|
+
```bash
|
|
296
|
+
# Build TypeScript
|
|
297
|
+
npm run build # Uses Vite
|
|
298
|
+
npm run build:tsc # Uses tsc directly
|
|
299
|
+
|
|
300
|
+
# Run compiled
|
|
301
|
+
npm start -- [args]
|
|
302
|
+
|
|
303
|
+
# Development mode
|
|
304
|
+
npm run dev -- [args] # Run once
|
|
305
|
+
npm run dev:watch -- [args] # Watch mode
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
### Testing
|
|
309
|
+
|
|
310
|
+
```bash
|
|
311
|
+
npm test # Run all tests
|
|
312
|
+
npm run test:watch # Watch mode
|
|
313
|
+
npm run test:coverage # With coverage
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
### Linting & Type Checking
|
|
317
|
+
|
|
318
|
+
```bash
|
|
319
|
+
npm run lint # ESLint
|
|
320
|
+
npm run typecheck # TypeScript --noEmit
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
### Cleaning
|
|
324
|
+
|
|
325
|
+
```bash
|
|
326
|
+
npm run clean # Remove dist/
|
|
327
|
+
npm run rebuild # Clean + build
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
## License
|
|
331
|
+
|
|
332
|
+
Same as parent project.
|