confluence-exporter 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/.eslintrc.cjs +18 -0
  2. package/.github/copilot-instructions.md +3 -0
  3. package/.github/prompts/analyze.prompt.md +101 -0
  4. package/.github/prompts/clarify.prompt.md +158 -0
  5. package/.github/prompts/constitution.prompt.md +73 -0
  6. package/.github/prompts/implement.prompt.md +56 -0
  7. package/.github/prompts/plan.prompt.md +50 -0
  8. package/.github/prompts/specify.prompt.md +21 -0
  9. package/.github/prompts/tasks.prompt.md +69 -0
  10. package/LICENSE +21 -0
  11. package/README.md +332 -0
  12. package/agents.md +1174 -0
  13. package/dist/api.d.ts +73 -0
  14. package/dist/api.js +387 -0
  15. package/dist/api.js.map +1 -0
  16. package/dist/commands/download.command.d.ts +18 -0
  17. package/dist/commands/download.command.js +257 -0
  18. package/dist/commands/download.command.js.map +1 -0
  19. package/dist/commands/executor.d.ts +22 -0
  20. package/dist/commands/executor.js +52 -0
  21. package/dist/commands/executor.js.map +1 -0
  22. package/dist/commands/help.command.d.ts +8 -0
  23. package/dist/commands/help.command.js +68 -0
  24. package/dist/commands/help.command.js.map +1 -0
  25. package/dist/commands/index.command.d.ts +14 -0
  26. package/dist/commands/index.command.js +95 -0
  27. package/dist/commands/index.command.js.map +1 -0
  28. package/dist/commands/index.d.ts +13 -0
  29. package/dist/commands/index.js +13 -0
  30. package/dist/commands/index.js.map +1 -0
  31. package/dist/commands/plan.command.d.ts +54 -0
  32. package/dist/commands/plan.command.js +272 -0
  33. package/dist/commands/plan.command.js.map +1 -0
  34. package/dist/commands/registry.d.ts +12 -0
  35. package/dist/commands/registry.js +32 -0
  36. package/dist/commands/registry.js.map +1 -0
  37. package/dist/commands/transform.command.d.ts +69 -0
  38. package/dist/commands/transform.command.js +951 -0
  39. package/dist/commands/transform.command.js.map +1 -0
  40. package/dist/commands/types.d.ts +12 -0
  41. package/dist/commands/types.js +5 -0
  42. package/dist/commands/types.js.map +1 -0
  43. package/dist/commands/update.command.d.ts +10 -0
  44. package/dist/commands/update.command.js +201 -0
  45. package/dist/commands/update.command.js.map +1 -0
  46. package/dist/constants.d.ts +1 -0
  47. package/dist/constants.js +2 -0
  48. package/dist/constants.js.map +1 -0
  49. package/dist/index.d.ts +5 -0
  50. package/dist/index.js +110 -0
  51. package/dist/index.js.map +1 -0
  52. package/dist/logger.d.ts +15 -0
  53. package/dist/logger.js +52 -0
  54. package/dist/logger.js.map +1 -0
  55. package/dist/types.d.ts +167 -0
  56. package/dist/types.js +5 -0
  57. package/dist/types.js.map +1 -0
  58. package/dist/utils.d.ts +56 -0
  59. package/dist/utils.js +178 -0
  60. package/dist/utils.js.map +1 -0
  61. package/eslint.config.js +29 -0
  62. package/jest.config.cjs +25 -0
  63. package/migrate-meta.js +132 -0
  64. package/package.json +53 -0
  65. package/src/api.ts +469 -0
  66. package/src/commands/download.command.ts +324 -0
  67. package/src/commands/executor.ts +62 -0
  68. package/src/commands/help.command.ts +72 -0
  69. package/src/commands/index.command.ts +111 -0
  70. package/src/commands/index.ts +14 -0
  71. package/src/commands/plan.command.ts +318 -0
  72. package/src/commands/registry.ts +39 -0
  73. package/src/commands/transform.command.ts +1103 -0
  74. package/src/commands/types.ts +16 -0
  75. package/src/commands/update.command.ts +229 -0
  76. package/src/constants.ts +0 -0
  77. package/src/index.ts +120 -0
  78. package/src/logger.ts +60 -0
  79. package/src/test.sh +66 -0
  80. package/src/types.ts +176 -0
  81. package/src/utils.ts +204 -0
  82. package/tests/commands/README.md +123 -0
  83. package/tests/commands/download.command.test.ts +8 -0
  84. package/tests/commands/help.command.test.ts +8 -0
  85. package/tests/commands/index.command.test.ts +8 -0
  86. package/tests/commands/plan.command.test.ts +15 -0
  87. package/tests/commands/transform.command.test.ts +8 -0
  88. package/tests/fixtures/_index.yaml +38 -0
  89. package/tests/fixtures/mock-pages.ts +62 -0
  90. package/tsconfig.json +25 -0
  91. package/vite.config.ts +45 -0
package/README.md ADDED
@@ -0,0 +1,332 @@
1
+ # Minimal Confluence to Markdown Exporter
2
+
3
+ A lightweight, standalone CLI tool to export Confluence spaces to Markdown files with hierarchical folder structure.
4
+
5
+ ## Installation
6
+
7
+ ### Via npx (recommended)
8
+ ```bash
9
+ npx confluence-export <command> [options]
10
+ ```
11
+
12
+ ### Via npm global install
13
+ ```bash
14
+ npm install -g confluence-export
15
+ confluence-export <command> [options]
16
+ ```
17
+
18
+ ### From source
19
+ ```bash
20
+ git clone <repo>
21
+ cd confluence-exporter
22
+ npm install
23
+ npm run build
24
+ node dist/index.js <command> [options]
25
+ ```
26
+
27
+ ## Features
28
+
29
+ - 🚀 Minimal dependencies (uses native Node.js fetch)
30
+ - 📄 Command-based CLI with five commands: `help`, `index`, `update`, `plan`, `download`, `transform`
31
+ - 🔄 Four-phase export workflow (indexing → planning → downloading → transforming)
32
+ - 📁 Hierarchical folder structure based on page tree (mirrors Confluence hierarchy)
33
+ - 📝 Separate HTML download and Markdown transformation for flexibility
34
+ - 🔗 HTML to Markdown transformation with Confluence macro support
35
+ - 👤 User link resolution with intelligent caching
36
+ - 📎 Image/attachment downloading with automatic slugification
37
+ - 💾 YAML-based indexing with resume capability
38
+ - ✨ Prettier formatting for consistent output
39
+
40
+ ## Prerequisites
41
+
42
+ - Node.js 18+ (for native fetch support)
43
+
44
+ ## Usage
45
+
46
+ ```bash
47
+ npx confluence-export <command> [options]
48
+ ```
49
+
50
+ or if installed globally:
51
+
52
+ ```bash
53
+ confluence-export <command> [options]
54
+ ```
55
+
56
+ ### Commands
57
+
58
+ - `help` - Display usage information
59
+ - `index` - Create page inventory (`_index.yaml`)
60
+ - `update` - Check for new/updated pages and update `_index.yaml`
61
+ - `plan` - Create download queue and tree structure (`_queue.yaml` + `_tree.yaml`)
62
+ - `download` - Download HTML pages from queue
63
+ - `transform` - Transform HTML files to Markdown (skips existing MD files, creates links structure)
64
+
65
+ Commands can be chained to run in sequence:
66
+ ```bash
67
+ node index.js index plan download transform [options]
68
+ ```
69
+
70
+ ### Options
71
+
72
+ | Flag | Long Form | Description | Default |
73
+ |------|-----------|-------------|---------|
74
+ | `-u` | `--url` | Confluence base URL | env: `CONFLUENCE_BASE_URL` |
75
+ | `-n` | `--username` | Username/email | env: `CONFLUENCE_USERNAME` |
76
+ | `-p` | `--password` | API token | env: `CONFLUENCE_PASSWORD` |
77
+ | `-s` | `--space` | Space key | env: `CONFLUENCE_SPACE_KEY` |
78
+ | `-o` | `--output` | Output directory | `./output` or env: `OUTPUT_DIR` |
79
+ | `-i` | `--pageId` | Single page ID (optional) | none |
80
+ | `-l` | `--limit` | Limit number of pages to process | none |
81
+ | | `--parallel` | Number of concurrent operations | `5` |
82
+ | `-f` | `--force` | Force re-download of all pages (skip version check) | false |
83
+ | | `--clear` | Clear existing MD files and images before transforming | false |
84
+ | | `--pageSize` | API page size | `25` |
85
+ | `-h` | `--help` | Show help message | |
86
+
87
+ ### Environment Variables
88
+
89
+ - `CONFLUENCE_BASE_URL`
90
+ - `CONFLUENCE_USERNAME`
91
+ - `CONFLUENCE_PASSWORD`
92
+ - `CONFLUENCE_SPACE_KEY`
93
+ - `OUTPUT_DIR`
94
+
95
+ ## Examples
96
+
97
+ ### Full Space Export (4-phase workflow)
98
+ ```bash
99
+ node index.js index plan download transform -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE -o ./output
100
+ ```
101
+
102
+ ### Full Space Export with Limit (process first 10 pages only)
103
+ ```bash
104
+ node index.js index plan download transform -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE -o ./output -l 10
105
+ ```
106
+
107
+ ### Create Index Only (Phase 1)
108
+ ```bash
109
+ node index.js index -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
110
+ ```
111
+
112
+ ### Check for New/Updated Pages and Update Existing Index
113
+ ```bash
114
+ node index.js update -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
115
+ ```
116
+
117
+ ### Create Download Queue from Existing Index (Phase 2)
118
+ ```bash
119
+ node index.js plan -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
120
+ ```
121
+
122
+ ### Create Download Queue for Specific Page and All Children
123
+ ```bash
124
+ node index.js plan -i 123456789 -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
125
+ ```
126
+
127
+ ### Force Re-download All Pages (ignore version check)
128
+ ```bash
129
+ node index.js plan --force -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
130
+ ```
131
+
132
+ ### Download HTML Pages from Existing Queue (Phase 3)
133
+ ```bash
134
+ node index.js download -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
135
+ ```
136
+
137
+ ### Transform HTML to Markdown (Phase 4)
138
+ ```bash
139
+ node index.js transform -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
140
+ ```
141
+
142
+ ### Transform HTML to Markdown with Clear (remove existing MD files first)
143
+ ```bash
144
+ node index.js transform --clear -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
145
+ ```
146
+
147
+ ### Download and Transform Together
148
+ ```bash
149
+ node index.js download transform -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
150
+ ```
151
+
152
+ ### Download and Transform with Higher Concurrency
153
+ ```bash
154
+ node index.js download transform --parallel 10 -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
155
+ ```
156
+
157
+ ### Download Single Page HTML Only (no index/plan needed)
158
+ ```bash
159
+ node index.js download -i 123456789 -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE
160
+ ```
161
+
162
+ ## Transform Command Details
163
+
164
+ The `transform` command converts downloaded HTML files from Confluence into Markdown format with the following features:
165
+
166
+ ### Key Features
167
+ - **HTML to Markdown Conversion**: Handles Confluence-specific elements like macros (code blocks, panels, user links), images, headers, lists, and links
168
+ - **Image Handling**: Downloads attachments referenced in Confluence image tags and saves them in `images/` subdirectories
169
+ - **Macro Support**: Transforms Confluence macros (e.g., `list-children` fetches child pages, `code` blocks become fenced code, panels become blockquotes)
170
+ - **User Link Resolution**: Converts Confluence user links to `@displayName` format using API calls
171
+ - **Cleanup and Formatting**: Removes HTML tags, entities, and malformed Markdown patterns; formats output with Prettier
172
+ - **Links Structure**: Creates a `links/` folder with symlinks to all MD files and a `_links.md` file showing a hierarchical tree
173
+ - **Resume Capability**: Skips existing Markdown files to allow incremental runs
174
+ - **Error Handling**: Non-fatal errors (e.g., failed image downloads) are logged as warnings
175
+
176
+ ### Options
177
+ - `--clear`: Remove existing MD files and images folders before transforming (useful for re-processing)
178
+ - `--limit <number>`: Process only the first N HTML files
179
+
180
+ ### Output
181
+ - Markdown files with YAML front matter (title, ID, URL, version, parentId)
182
+ - Downloaded images in `images/` subdirectories per page
183
+ - `links/` folder with symlinks and hierarchical index (`_links.md`)
184
+
185
+ ## Output Structure
186
+
187
+ ### Hierarchical Structure (when `_tree.yaml` exists)
188
+ ```
189
+ outputDir/
190
+ ├── _index.yaml # Page index (YAML array)
191
+ ├── _queue.yaml # Download queue (YAML array)
192
+ ├── _tree.yaml # Hierarchical page tree structure
193
+ └── MYSPACE/ # Root folder (space key)
194
+ ├── 123456-page-title.html
195
+ ├── 123456-page-title.md
196
+ └── 123456-page-title/ # Folder for children
197
+ ├── images/ # Images for child pages
198
+ │ └── logo.png
199
+ ├── 789012-child-page.html
200
+ ├── 789012-child-page.md
201
+ └── 789012-child-page/ # Nested children
202
+ ├── 345678-grandchild.html
203
+ └── 345678-grandchild.md
204
+ ```
205
+
206
+ ### Flat Structure (fallback when only `_queue.yaml` exists)
207
+ ```
208
+ outputDir/
209
+ ├── _index.yaml # Page index (YAML array)
210
+ ├── _queue.yaml # Download queue (YAML array)
211
+ ├── page-title-1.md # Formatted markdown
212
+ ├── page-title-1.html # Original HTML (formatted)
213
+ ├── page-title-2.md
214
+ ├── page-title-2.html
215
+ └── images/ # Shared images folder
216
+ ├── image-1.png
217
+ └── image-2.jpg
218
+ ```
219
+
220
+ ## Front Matter Format
221
+
222
+ ```yaml
223
+ ---
224
+ title: "Page Title"
225
+ id: "123456789"
226
+ url: "https://mysite.atlassian.net/pages/viewpage.action?pageId=123456789"
227
+ version: 5
228
+ parentId: "987654321"
229
+ ---
230
+ ```
231
+
232
+ ## Prettier Formatting
233
+
234
+ **Markdown:**
235
+ - `printWidth: 120`
236
+ - `proseWrap: 'preserve'` (don't reflow text)
237
+ - `tabWidth: 2`
238
+
239
+ **HTML:**
240
+ - `printWidth: 120`
241
+ - `htmlWhitespaceSensitivity: 'ignore'`
242
+ - Consistent 2-space indentation
243
+
244
+ Formatting failures are non-fatal (saves unformatted with warning).
245
+
246
+ ## Project Structure
247
+
248
+ ```
249
+ src/
250
+ ├── index.ts # CLI entry point (arg parsing, config validation)
251
+ ├── types.ts # TypeScript type definitions
252
+ ├── api.ts # Confluence REST API client
253
+ ├── transformer.ts # HTML → Markdown conversion
254
+ ├── cleaner.ts # Post-processing cleanup
255
+ └── commands/ # Command handlers (modular architecture)
256
+ ├── types.ts # Command-related type definitions
257
+ ├── help.command.ts # Help command handler
258
+ ├── index.command.ts # Index command handler
259
+ ├── update.command.ts # Update command handler
260
+ ├── plan.command.ts # Plan command handler
261
+ ├── download.command.ts # Download command handler (HTML only)
262
+ ├── transform.command.ts # Transform command handler (HTML → MD)
263
+ ├── registry.ts # Command registry (maps commands to handlers)
264
+ ├── executor.ts # Command executor (orchestrates execution)
265
+ └── index.ts # Exports for easy importing
266
+ ```
267
+
268
+ ## User Link Resolution
269
+
270
+ The exporter automatically resolves Confluence user links to display names:
271
+
272
+ ```html
273
+ <!-- Confluence HTML -->
274
+ <ac:link><ri:user ri:username="john.doe"/></ac:link>
275
+ <ac:link><ri:user ri:userkey="ff8080817b0a1234"/></ac:link>
276
+ ```
277
+
278
+ Becomes:
279
+
280
+ ```markdown
281
+ @John Doe
282
+ @John Doe
283
+ ```
284
+
285
+ Features:
286
+ - ✓ Resolves by username or userkey
287
+ - ✓ Caches user lookups to minimize API calls
288
+ - ✓ Falls back to username if API fails
289
+ - ✓ Handles unknown users gracefully
290
+
291
+ ## Development
292
+
293
+ ### Build & Run
294
+
295
+ ```bash
296
+ # Build TypeScript
297
+ npm run build # Uses Vite
298
+ npm run build:tsc # Uses tsc directly
299
+
300
+ # Run compiled
301
+ npm start -- [args]
302
+
303
+ # Development mode
304
+ npm run dev -- [args] # Run once
305
+ npm run dev:watch -- [args] # Watch mode
306
+ ```
307
+
308
+ ### Testing
309
+
310
+ ```bash
311
+ npm test # Run all tests
312
+ npm run test:watch # Watch mode
313
+ npm run test:coverage # With coverage
314
+ ```
315
+
316
+ ### Linting & Type Checking
317
+
318
+ ```bash
319
+ npm run lint # ESLint
320
+ npm run typecheck # TypeScript --noEmit
321
+ ```
322
+
323
+ ### Cleaning
324
+
325
+ ```bash
326
+ npm run clean # Remove dist/
327
+ npm run rebuild # Clean + build
328
+ ```
329
+
330
+ ## License
331
+
332
+ Same as parent project.