confluence-exporter 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/.eslintrc.cjs +18 -0
  2. package/.github/copilot-instructions.md +3 -0
  3. package/.github/prompts/analyze.prompt.md +101 -0
  4. package/.github/prompts/clarify.prompt.md +158 -0
  5. package/.github/prompts/constitution.prompt.md +73 -0
  6. package/.github/prompts/implement.prompt.md +56 -0
  7. package/.github/prompts/plan.prompt.md +50 -0
  8. package/.github/prompts/specify.prompt.md +21 -0
  9. package/.github/prompts/tasks.prompt.md +69 -0
  10. package/LICENSE +21 -0
  11. package/README.md +332 -0
  12. package/agents.md +1174 -0
  13. package/dist/api.d.ts +73 -0
  14. package/dist/api.js +387 -0
  15. package/dist/api.js.map +1 -0
  16. package/dist/commands/download.command.d.ts +18 -0
  17. package/dist/commands/download.command.js +257 -0
  18. package/dist/commands/download.command.js.map +1 -0
  19. package/dist/commands/executor.d.ts +22 -0
  20. package/dist/commands/executor.js +52 -0
  21. package/dist/commands/executor.js.map +1 -0
  22. package/dist/commands/help.command.d.ts +8 -0
  23. package/dist/commands/help.command.js +68 -0
  24. package/dist/commands/help.command.js.map +1 -0
  25. package/dist/commands/index.command.d.ts +14 -0
  26. package/dist/commands/index.command.js +95 -0
  27. package/dist/commands/index.command.js.map +1 -0
  28. package/dist/commands/index.d.ts +13 -0
  29. package/dist/commands/index.js +13 -0
  30. package/dist/commands/index.js.map +1 -0
  31. package/dist/commands/plan.command.d.ts +54 -0
  32. package/dist/commands/plan.command.js +272 -0
  33. package/dist/commands/plan.command.js.map +1 -0
  34. package/dist/commands/registry.d.ts +12 -0
  35. package/dist/commands/registry.js +32 -0
  36. package/dist/commands/registry.js.map +1 -0
  37. package/dist/commands/transform.command.d.ts +69 -0
  38. package/dist/commands/transform.command.js +951 -0
  39. package/dist/commands/transform.command.js.map +1 -0
  40. package/dist/commands/types.d.ts +12 -0
  41. package/dist/commands/types.js +5 -0
  42. package/dist/commands/types.js.map +1 -0
  43. package/dist/commands/update.command.d.ts +10 -0
  44. package/dist/commands/update.command.js +201 -0
  45. package/dist/commands/update.command.js.map +1 -0
  46. package/dist/constants.d.ts +1 -0
  47. package/dist/constants.js +2 -0
  48. package/dist/constants.js.map +1 -0
  49. package/dist/index.d.ts +5 -0
  50. package/dist/index.js +110 -0
  51. package/dist/index.js.map +1 -0
  52. package/dist/logger.d.ts +15 -0
  53. package/dist/logger.js +52 -0
  54. package/dist/logger.js.map +1 -0
  55. package/dist/types.d.ts +167 -0
  56. package/dist/types.js +5 -0
  57. package/dist/types.js.map +1 -0
  58. package/dist/utils.d.ts +56 -0
  59. package/dist/utils.js +178 -0
  60. package/dist/utils.js.map +1 -0
  61. package/eslint.config.js +29 -0
  62. package/jest.config.cjs +25 -0
  63. package/migrate-meta.js +132 -0
  64. package/package.json +53 -0
  65. package/src/api.ts +469 -0
  66. package/src/commands/download.command.ts +324 -0
  67. package/src/commands/executor.ts +62 -0
  68. package/src/commands/help.command.ts +72 -0
  69. package/src/commands/index.command.ts +111 -0
  70. package/src/commands/index.ts +14 -0
  71. package/src/commands/plan.command.ts +318 -0
  72. package/src/commands/registry.ts +39 -0
  73. package/src/commands/transform.command.ts +1103 -0
  74. package/src/commands/types.ts +16 -0
  75. package/src/commands/update.command.ts +229 -0
  76. package/src/constants.ts +0 -0
  77. package/src/index.ts +120 -0
  78. package/src/logger.ts +60 -0
  79. package/src/test.sh +66 -0
  80. package/src/types.ts +176 -0
  81. package/src/utils.ts +204 -0
  82. package/tests/commands/README.md +123 -0
  83. package/tests/commands/download.command.test.ts +8 -0
  84. package/tests/commands/help.command.test.ts +8 -0
  85. package/tests/commands/index.command.test.ts +8 -0
  86. package/tests/commands/plan.command.test.ts +15 -0
  87. package/tests/commands/transform.command.test.ts +8 -0
  88. package/tests/fixtures/_index.yaml +38 -0
  89. package/tests/fixtures/mock-pages.ts +62 -0
  90. package/tsconfig.json +25 -0
  91. package/vite.config.ts +45 -0
@@ -0,0 +1,324 @@
1
+ /**
2
+ * Download command handler - Downloads HTML pages from _queue.yaml
3
+ */
4
+
5
+ import type { CommandHandler, CommandContext } from './types.js';
6
+ import { ConfluenceApi } from '../api.js';
7
+ import type { PageTreeNode, PageIndexEntry, ConfluenceConfig } from '../types.js';
8
+ import path, { join } from 'path';
9
+ import { mkdirSync, writeFileSync, readFileSync, existsSync } from 'fs';
10
+ import { parse, stringify } from 'yaml';
11
+ import { format } from 'prettier';
12
+ import { updateIndexEntry, readIndexEntry } from '../utils.js';
13
+
14
+ export class DownloadCommand implements CommandHandler {
15
+ name = 'download';
16
+ description = 'Download HTML pages from Confluence';
17
+
18
+ constructor(private config: ConfluenceConfig) { }
19
+
20
+ async execute(context: CommandContext): Promise<void> {
21
+ const api = new ConfluenceApi(this.config);
22
+
23
+ // Single page mode
24
+ if (this.config.pageId) {
25
+ console.log(`\n📄 Downloading single page: ${this.config.pageId}\n`);
26
+
27
+ // Check if page needs downloading
28
+ const indexPath = join(this.config.outputDir, '_index.yaml');
29
+ const indexEntry = readIndexEntry(indexPath, this.config.pageId);
30
+
31
+ if (indexEntry && indexEntry.downloadedVersion !== undefined && indexEntry.downloadedAt) {
32
+ // Check if we have current version info
33
+ const currentVersion = indexEntry.version ?? 0;
34
+ const downloadedVersion = indexEntry.downloadedVersion;
35
+
36
+ if (currentVersion === downloadedVersion) {
37
+ console.log(`⏭️ Page ${this.config.pageId} is up-to-date (v${downloadedVersion}), skipping download`);
38
+ console.log('\n✅ Download complete!\n');
39
+ return;
40
+ } else {
41
+ console.log(`📥 Updating page ${this.config.pageId} from v${downloadedVersion} to v${currentVersion}`);
42
+ }
43
+ } else {
44
+ console.log(`📥 Downloading new page ${this.config.pageId}`);
45
+ }
46
+
47
+ await this.downloadPage(api, this.config.pageId);
48
+ console.log('\n✅ Download complete!\n');
49
+ return;
50
+ }
51
+
52
+ // Queue mode - check for tree first, then fallback to queue
53
+ const treeFile = join(this.config.outputDir, '_tree.yaml');
54
+ const queueFile = join(this.config.outputDir, '_queue.yaml');
55
+
56
+ console.log(`\n🔍 Checking for tree file: ${treeFile}`);
57
+ console.log(`🔍 Checking for queue file: ${queueFile}\n`);
58
+
59
+ const hasTree = existsSync(treeFile);
60
+ const hasQueue = existsSync(queueFile);
61
+
62
+ if (!hasTree || !hasQueue) {
63
+ throw new Error(
64
+ `❌ Tree or queue file not found. Run 'plan' command first to create the tree and queue.`
65
+ );
66
+ }
67
+
68
+ // Read queue
69
+ const queueContent = readFileSync(queueFile, 'utf-8');
70
+ const queue = parse(queueContent) as PageIndexEntry[];
71
+
72
+ console.log(`📊 Queue contains ${queue.length} pages\n`);
73
+
74
+ // Apply limit if specified
75
+ const pagesToProcess = this.config.limit ? queue.slice(0, this.config.limit) : queue;
76
+
77
+ await this.downloadFromQueueWithHierarchy(api, this.config, pagesToProcess);
78
+ }
79
+
80
+ private async downloadFromQueueWithHierarchy(
81
+ api: ConfluenceApi,
82
+ config: ConfluenceConfig,
83
+ queue: PageIndexEntry[]
84
+ ): Promise<void> {
85
+ const treeFile = join(config.outputDir, '_tree.yaml');
86
+ const treeContent = readFileSync(treeFile, 'utf-8');
87
+ const tree = parse(treeContent) as PageTreeNode[];
88
+
89
+ // Build a map of pageId -> path
90
+ const pagePathMap = new Map<string, string>();
91
+
92
+ const buildPathMap = (node: PageTreeNode, currentPath: string) => {
93
+ // Store the path for this page
94
+ pagePathMap.set(node.id, currentPath);
95
+
96
+ // If node has children, build paths for them too
97
+ if (node.children && node.children.length > 0) {
98
+ const slug = this.slugify(node.title);
99
+ const childDir = join(currentPath, `${node.id}-${slug}`);
100
+
101
+ for (const child of node.children) {
102
+ buildPathMap(child, childDir);
103
+ }
104
+ }
105
+ };
106
+
107
+ // Create root folder for space
108
+ const rootDir = join(config.outputDir, config.spaceKey);
109
+ mkdirSync(rootDir, { recursive: true });
110
+
111
+ // Build the path map from tree
112
+ for (const node of tree) {
113
+ buildPathMap(node, rootDir);
114
+ }
115
+
116
+ // Display first page path as tree
117
+ const firstPagePath = pagePathMap.get(queue[0].id);
118
+ if (firstPagePath) {
119
+ this.displayPathAsTree(path.dirname(firstPagePath), rootDir);
120
+ }
121
+
122
+ // Download pages from queue using the path map in batches
123
+ const batchSize = config.parallel || 5;
124
+ for (let batchStart = 0; batchStart < queue.length; batchStart += batchSize) {
125
+ const batchEnd = Math.min(batchStart + batchSize, queue.length);
126
+ const batch = queue.slice(batchStart, batchEnd);
127
+
128
+ console.log(`\n📦 Processing batch ${Math.floor(batchStart / batchSize) + 1}/${Math.ceil(queue.length / batchSize)} (${batch.length} pages)`);
129
+
130
+ await this.downloadBatch(api, config, batch, pagePathMap, rootDir);
131
+ }
132
+
133
+ console.log('\n✅ Download complete!\n');
134
+ }
135
+
136
+ private async downloadBatch(
137
+ api: ConfluenceApi,
138
+ config: ConfluenceConfig,
139
+ batch: PageIndexEntry[],
140
+ pagePathMap: Map<string, string>,
141
+ rootDir: string
142
+ ): Promise<void> {
143
+ const indexPath = join(config.outputDir, '_index.yaml');
144
+
145
+ // Read index once for the entire batch
146
+ const indexContent = readFileSync(indexPath, 'utf-8');
147
+ const allIndexEntries = parse(indexContent) as PageIndexEntry[];
148
+ const indexMap = new Map(allIndexEntries.map(entry => [entry.id, entry]));
149
+
150
+ // Determine which pages need downloading
151
+ const pagesToDownload: Array<{ entry: PageIndexEntry; pagePath: string; skipReason: string }> = [];
152
+ const skippedPages: Array<{ entry: PageIndexEntry; pagePath: string; skipReason: string }> = [];
153
+
154
+ for (const entry of batch) {
155
+ const pagePath = pagePathMap.get(entry.id) || rootDir;
156
+ const indexEntry = indexMap.get(entry.id);
157
+
158
+ let skip = false;
159
+ let skipReason = '';
160
+
161
+ if (indexEntry && indexEntry.downloadedVersion !== undefined && indexEntry.downloadedAt) {
162
+ // Page has been downloaded before
163
+ const currentVersion = entry.version ?? 0;
164
+ const downloadedVersion = indexEntry.downloadedVersion;
165
+
166
+ if (currentVersion === downloadedVersion) {
167
+ skip = true;
168
+ skipReason = `(⏭️ skipped - up-to-date v${downloadedVersion})`;
169
+ } else {
170
+ skipReason = `(📥 updating v${downloadedVersion} → v${currentVersion})`;
171
+ }
172
+ } else {
173
+ skipReason = `(📥 new download)`;
174
+ }
175
+
176
+ const filename = `${entry.id}-${this.slugify(entry.title)}.html`;
177
+ const deep = Math.max(0, pagePath.split(path.sep).length - rootDir.split(path.sep).length - 1);
178
+ console.log(`${' '.repeat(deep)}/${filename} ${skipReason}`);
179
+
180
+ if (skip) {
181
+ skippedPages.push({ entry, pagePath, skipReason });
182
+ } else {
183
+ pagesToDownload.push({ entry, pagePath, skipReason });
184
+ }
185
+ }
186
+
187
+ // Download pages in parallel
188
+ if (pagesToDownload.length > 0) {
189
+ console.log(`\n📥 Downloading ${pagesToDownload.length} pages in parallel...`);
190
+
191
+ const downloadPromises = pagesToDownload.map(async ({ entry, pagePath }) => {
192
+ try {
193
+ // Create directory if it doesn't exist
194
+ mkdirSync(pagePath, { recursive: true });
195
+ const page = await api.getPage(entry.id);
196
+
197
+ // Update the index entry in memory
198
+ const indexEntry = indexMap.get(entry.id);
199
+ if (indexEntry) {
200
+ indexEntry.downloadedVersion = page.version ?? 0;
201
+ indexEntry.downloadedAt = new Date().toISOString();
202
+ }
203
+
204
+ return { entry, page, pagePath, success: true };
205
+ } catch (error) {
206
+ console.error(`❌ Failed to download page ${entry.id}:`, error);
207
+ return { entry, page: null, pagePath, success: false };
208
+ }
209
+ });
210
+
211
+ const results = await Promise.all(downloadPromises);
212
+
213
+ // Process successful downloads
214
+ for (const result of results) {
215
+ if (result.success && result.page) {
216
+ try {
217
+ await this.savePageToFile(result.page, result.pagePath);
218
+ } catch (error) {
219
+ console.error(`❌ Failed to save page ${result.entry.id}:`, error);
220
+ }
221
+ }
222
+ }
223
+ }
224
+
225
+ // Write updated index back to file once for the entire batch
226
+ if (pagesToDownload.length > 0) {
227
+ const yamlContent = stringify(allIndexEntries, {
228
+ indent: 2,
229
+ lineWidth: 0
230
+ });
231
+ writeFileSync(indexPath, yamlContent, 'utf-8');
232
+ console.log(`💾 Updated index with ${pagesToDownload.length} downloaded pages`);
233
+ }
234
+
235
+ console.log(`✅ Batch complete: ${pagesToDownload.length} downloaded, ${skippedPages.length} skipped`);
236
+ }
237
+
238
+ private async savePageToFile(
239
+ page: { id: string; title: string; body: string; version?: number },
240
+ filePath: string
241
+ ): Promise<void> {
242
+ const slug = this.slugify(page.title);
243
+ const filename = `${page.id}-${slug}.html`;
244
+ const fullPath = join(filePath, filename);
245
+
246
+ // Format HTML with Prettier
247
+ let formattedHtml = page.body;
248
+ try {
249
+ formattedHtml = await format(page.body, {
250
+ parser: 'html',
251
+ printWidth: 120,
252
+ htmlWhitespaceSensitivity: 'ignore',
253
+ tabWidth: 2,
254
+ });
255
+ } catch {
256
+ console.warn(`⚠️ Failed to format HTML for ${page.title}, saving unformatted`);
257
+ }
258
+
259
+ writeFileSync(fullPath, formattedHtml, 'utf-8');
260
+ }
261
+
262
+ private displayPathAsTree(fullPath: string, rootDir: string): void {
263
+ // Get relative path from root
264
+ const relativePath = path.relative(rootDir, fullPath);
265
+
266
+ // Split path into segments
267
+ const segments = relativePath.split(path.sep).filter(s => s);
268
+
269
+ // Display each segment with proper indentation
270
+ for (let i = 0; i < segments.length; i++) {
271
+ const indent = ' '.repeat(i);
272
+ console.log(`${indent}/${segments[i]}`);
273
+ }
274
+ }
275
+
276
+ private async downloadPage(
277
+ api: ConfluenceApi,
278
+ pageId: string,
279
+ outputDir?: string
280
+ ): Promise<void> {
281
+ const page = await api.getPage(pageId);
282
+ const slug = this.slugify(page.title);
283
+ const filename = `${pageId}-${slug}.html`;
284
+
285
+ const dir = outputDir || process.cwd();
286
+ const filepath = join(dir, filename);
287
+
288
+ // Format HTML with Prettier
289
+ let formattedHtml = page.body;
290
+ try {
291
+ formattedHtml = await format(page.body, {
292
+ parser: 'html',
293
+ printWidth: 120,
294
+ htmlWhitespaceSensitivity: 'ignore',
295
+ tabWidth: 2,
296
+ });
297
+ } catch {
298
+ console.warn(`⚠️ Failed to format HTML for ${page.title}, saving unformatted`);
299
+ }
300
+
301
+ writeFileSync(filepath, formattedHtml, 'utf-8');
302
+
303
+ // Update _index.yaml with download metadata
304
+ const indexPath = join(this.config.outputDir, '_index.yaml');
305
+ const success = updateIndexEntry(indexPath, page.id, {
306
+ downloadedVersion: page.version ?? 0,
307
+ downloadedAt: new Date().toISOString()
308
+ });
309
+
310
+ if (!success) {
311
+ console.warn(`⚠️ Failed to update index for page ${page.title}`);
312
+ }
313
+ }
314
+
315
+ private slugify(text: string): string {
316
+ return text
317
+ .toLowerCase()
318
+ .replace(/[^\w\s-]/g, '')
319
+ .replace(/\s+/g, '-')
320
+ .replace(/-+/g, '-')
321
+ .trim()
322
+ .substring(0, 50); // Limit length for filesystem
323
+ }
324
+ }
@@ -0,0 +1,62 @@
1
+ /**
2
+ * Command executor - orchestrates command execution
3
+ */
4
+
5
+ import { ConfluenceConfig } from 'src/types.js';
6
+ import { CommandRegistry } from './registry.js';
7
+ import type { Command, CommandContext } from './types.js';
8
+
9
+ export class CommandExecutor {
10
+ private registry: CommandRegistry;
11
+
12
+ constructor(private config: ConfluenceConfig) {
13
+ this.registry = new CommandRegistry(config);
14
+ }
15
+
16
+ /**
17
+ * Validate commands and return parsed list
18
+ */
19
+ validateCommands(commands: string[]): Command[] {
20
+ const validated: Command[] = [];
21
+
22
+ for (const cmd of commands) {
23
+ const command = cmd.toLowerCase();
24
+ if (this.registry.isValidCommand(command)) {
25
+ validated.push(command);
26
+ } else {
27
+ throw new Error(`Unknown command: "${cmd}"`);
28
+ }
29
+ }
30
+
31
+ return validated;
32
+ }
33
+
34
+ /**
35
+ * Execute a sequence of commands
36
+ */
37
+ async executeCommands(commands: Command[], context: CommandContext): Promise<void> {
38
+ for (let i = 0; i < commands.length; i++) {
39
+ const command = commands[i];
40
+
41
+ if (i > 0) {
42
+ console.log('\n' + '─'.repeat(60) + '\n');
43
+ }
44
+
45
+ const handler = this.registry.getHandler(command);
46
+ if (!handler) {
47
+ throw new Error(`No handler found for command: ${command}`);
48
+ }
49
+
50
+ await handler.execute(context);
51
+ }
52
+
53
+ console.log('\n✓ All commands completed successfully!');
54
+ }
55
+
56
+ /**
57
+ * Get list of valid commands
58
+ */
59
+ getValidCommands(): Command[] {
60
+ return this.registry.getValidCommands();
61
+ }
62
+ }
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Help command handler
3
+ */
4
+
5
+ import { ConfluenceConfig } from 'src/types.js';
6
+ import type { CommandContext, CommandHandler } from './types.js';
7
+
8
+ export class HelpCommand implements CommandHandler {
9
+ async execute(_context: CommandContext): Promise<void> {
10
+ this.showHelp();
11
+ }
12
+
13
+ private showHelp(): void {
14
+ console.log('Minimal Confluence to Markdown Exporter\n');
15
+ console.log('Usage: node index.js [command] [options]\n');
16
+ console.log('If no command is provided, runs full sync: update index (or create if not exists), plan, download, and transform.\n');
17
+ console.log('Commands:');
18
+ console.log(' help Show this help message');
19
+ console.log(' index Create _index.yaml with page metadata');
20
+ console.log(' update Check for new/updated pages and update _index.yaml');
21
+ console.log(' plan Create _queue.yaml for download (from index or specific page tree)');
22
+ console.log(' download Download HTML pages from _queue.yaml');
23
+ console.log(' transform Transform HTML files to Markdown (skips existing MD files, creates links structure)');
24
+ console.log(' index plan download transform Run all commands in sequence\n');
25
+ console.log('Options:');
26
+ console.log(' -u, --url <url> Confluence base URL');
27
+ console.log(' -n, --username <email> Confluence username/email');
28
+ console.log(' -p, --password <token> Confluence API token');
29
+ console.log(' -s, --space <key> Confluence space key');
30
+ console.log(' -i, --pageId <id> Download specific page ID only (optional)');
31
+ console.log(' -o, --output <dir> Output directory (default: ./output)');
32
+ console.log(' -l, --limit <number> Limit number of pages to process (optional)');
33
+ console.log(' --parallel <number> Number of concurrent operations (default: 5)');
34
+ console.log(' -f, --force Force re-download of all pages (skip version check)');
35
+ console.log(' --clear Clear existing MD files and images before transforming');
36
+ console.log(' --pageSize <number> Items per API page (default: 25)');
37
+ console.log(' -h, --help Show this help message\n');
38
+ console.log('Environment Variables:');
39
+ console.log(' CONFLUENCE_BASE_URL');
40
+ console.log(' CONFLUENCE_USERNAME');
41
+ console.log(' CONFLUENCE_PASSWORD');
42
+ console.log(' CONFLUENCE_SPACE_KEY');
43
+ console.log(' OUTPUT_DIR\n');
44
+ console.log('Examples:');
45
+ console.log(' # Full workflow: index, plan, download, and transform');
46
+ console.log(' node index.js index plan download transform -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE\n');
47
+ console.log(' # Full workflow with limit (process first 10 pages only)');
48
+ console.log(' node index.js index plan download transform -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE -l 10\n');
49
+ console.log(' # Create index only (Phase 1)');
50
+ console.log(' node index.js index -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE\n');
51
+ console.log(' # Check for new/updated pages and update existing index');
52
+ console.log(' node index.js update -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE\n');
53
+ console.log(' # Create download queue from existing index (Phase 2)');
54
+ console.log(' node index.js plan -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE\n');
55
+ console.log(' # Create download queue for specific page and all children');
56
+ console.log(' node index.js plan -i 123456789 -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE\n');
57
+ console.log(' # Force re-download all pages (ignore version check)');
58
+ console.log(' node index.js plan --force -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE\n');
59
+ console.log(' # Download HTML pages from existing queue (Phase 3)');
60
+ console.log(' node index.js download -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE\n');
61
+ console.log(' # Transform HTML to Markdown (Phase 4)');
62
+ console.log(' node index.js transform -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE\n');
63
+ console.log(' # Transform HTML to Markdown with clear (remove existing MD files first)');
64
+ console.log(' node index.js transform --clear -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE\n');
65
+ console.log(' # Download and transform together');
66
+ console.log(' node index.js download transform -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE\n');
67
+ console.log(' # Download and transform with higher concurrency');
68
+ console.log(' node index.js download transform --parallel 10 -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE\n');
69
+ console.log(' # Download single page HTML only (no index/plan needed)');
70
+ console.log(' node index.js download -i 123456789 -u https://mysite.atlassian.net -n user@example.com -p token -s MYSPACE');
71
+ }
72
+ }
@@ -0,0 +1,111 @@
1
+ /**
2
+ * Index command handler - Creates _index.yaml with all page metadata
3
+ */
4
+
5
+ import { promises as fs } from 'fs';
6
+ import path from 'path';
7
+ import yaml from 'yaml';
8
+ import { ConfluenceApi } from '../api.js';
9
+ import type { ConfluenceConfig, PageIndexEntry } from '../types.js';
10
+ import type { CommandContext, CommandHandler } from './types.js';
11
+
12
+ export class IndexCommand implements CommandHandler {
13
+ constructor(private config: ConfluenceConfig) {}
14
+
15
+ async execute(context: CommandContext): Promise<void> {
16
+ const api = new ConfluenceApi(this.config);
17
+
18
+ // Create output directory if it doesn't exist
19
+ await fs.mkdir(this.config.outputDir, { recursive: true });
20
+
21
+ console.log(`Starting indexing of space: ${this.config.spaceKey}`);
22
+ console.log(`Output directory: ${this.config.outputDir}\n`);
23
+
24
+ // Phase 1: Create _index.yaml
25
+ console.log('Phase 1: Creating _index.yaml...');
26
+ await this.createIndex(api, this.config);
27
+
28
+ console.log(`\nIndexing complete!`);
29
+ console.log(`Index saved to: ${this.config.outputDir}/_index.yaml`);
30
+ }
31
+
32
+ /**
33
+ * Create _index.yaml file with all pages to download
34
+ */
35
+ private async createIndex(api: ConfluenceApi, config: CommandContext['config']): Promise<void> {
36
+ const indexPath = path.join(config.outputDir, '_index.yaml');
37
+
38
+ let pageCount = 0;
39
+ let pageSize = config.pageSize || 100;
40
+ let startFrom = 0;
41
+
42
+ // Check if _index.yaml already exists (resume functionality)
43
+ try {
44
+ const existingContent = await fs.readFile(indexPath, 'utf-8');
45
+
46
+ // Extract page size from comment if it exists
47
+ const pageSizeMatch = existingContent.match(/# Page Size: (\d+)/);
48
+ if (pageSizeMatch) {
49
+ pageSize = parseInt(pageSizeMatch[1], 10);
50
+ console.log(`Using existing page size from index: ${pageSize}`);
51
+ }
52
+
53
+ const existingPages = yaml.parse(existingContent) as PageIndexEntry[];
54
+
55
+ if (existingPages && Array.isArray(existingPages)) {
56
+ pageCount = existingPages.length;
57
+ // Calculate the start position for the API
58
+ startFrom = pageCount;
59
+ console.log(`Found existing index with ${pageCount} pages. Resuming from position ${startFrom + 1}...\n`);
60
+ }
61
+ } catch (_error) {
62
+ // File doesn't exist or is invalid, start fresh
63
+ const header = `# Confluence Export Index
64
+ # Space: ${config.spaceKey}
65
+ # Export Date: ${new Date().toISOString()}
66
+ # Page Size: ${pageSize}
67
+
68
+ `;
69
+ await fs.writeFile(indexPath, header, 'utf-8');
70
+ console.log(`Creating new index with page size: ${pageSize}...\n`);
71
+ }
72
+
73
+ // Fetch pages starting from where we left off
74
+ for await (const page of api.getAllPages(config.spaceKey, pageSize, startFrom)) {
75
+ pageCount++;
76
+ console.log(`[${pageCount}] Indexed: ${page.title} (${page.id}) [API Page ${page.apiPageNumber}]`);
77
+
78
+ // Create page entry
79
+ const pageEntry: PageIndexEntry = {
80
+ id: page.id,
81
+ title: page.title,
82
+ version: page.version,
83
+ parentId: page.parentId,
84
+ modifiedDate: page.modifiedDate,
85
+ indexedDate: new Date().toISOString(),
86
+ pageNumber: page.apiPageNumber
87
+ };
88
+
89
+ // Convert to YAML and format as array item (with leading -)
90
+ const yamlDoc = yaml.stringify(pageEntry).trim();
91
+ const lines = yamlDoc.split('\n');
92
+ const arrayItem = lines.map((line, index) => {
93
+ if (index === 0) {
94
+ return `- ${line}`;
95
+ }
96
+ return ` ${line}`;
97
+ }).join('\n');
98
+
99
+ await fs.appendFile(indexPath, arrayItem + '\n', 'utf-8');
100
+
101
+ // Check if limit is reached
102
+ if (config.limit && pageCount >= config.limit) {
103
+ console.log(`\n⚠ Limit reached: ${config.limit} pages indexed`);
104
+ break;
105
+ }
106
+ }
107
+
108
+ console.log(`\n✓ Index created: ${indexPath}`);
109
+ console.log(` Total pages indexed: ${pageCount}`);
110
+ }
111
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Commands module exports
3
+ * Re-exports all command-related classes and types for easier importing
4
+ */
5
+
6
+ export { HelpCommand } from './help.command.js';
7
+ export { IndexCommand } from './index.command.js';
8
+ export { UpdateCommand } from './update.command.js';
9
+ export { PlanCommand } from './plan.command.js';
10
+ export { DownloadCommand } from './download.command.js';
11
+ export { TransformCommand } from './transform.command.js';
12
+ export { CommandRegistry } from './registry.js';
13
+ export { CommandExecutor } from './executor.js';
14
+ export type { Command, CommandContext, CommandHandler } from './types.js';