npm - confluence-cli - Versions diffs - 1.10.0 → 1.10.1 - Mend

confluence-cli 1.10.0 → 1.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/CHANGELOG.md +7 -0
package/bin/confluence.js +4 -2
package/lib/confluence-client.js +300 -12
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,10 @@
+## [1.10.1](https://github.com/pchuri/confluence-cli/compare/v1.10.0...v1.10.1) (2025-12-08)
+### Bug Fixes
+* improve markdown export and attachment download ([#19](https://github.com/pchuri/confluence-cli/issues/19)) ([978275d](https://github.com/pchuri/confluence-cli/commit/978275dbe71eea83138bbd537ce7d4edda8180f8))
 # [1.10.0](https://github.com/pchuri/confluence-cli/compare/v1.9.0...v1.10.0) (2025-12-05)

package/bin/confluence.js CHANGED Viewed

@@ -404,7 +404,8 @@ program
         let downloaded = 0;
         for (const attachment of filtered) {
           const targetPath = uniquePathFor(destDir, attachment.title);
-          const dataStream = await client.downloadAttachment(pageId, attachment.id);
+          // Pass the full attachment object so downloadAttachment can use downloadLink directly
+          const dataStream = await client.downloadAttachment(pageId, attachment);
           await writeStream(dataStream, targetPath);
           downloaded += 1;
           console.log(`⬇️  ${chalk.green(attachment.title)} -> ${chalk.gray(targetPath)}`);
@@ -495,7 +496,8 @@ program
           let downloaded = 0;
           for (const attachment of filtered) {
             const targetPath = uniquePathFor(attachmentsDir, attachment.title);
-            const dataStream = await client.downloadAttachment(pageId, attachment.id);
+            // Pass the full attachment object so downloadAttachment can use downloadLink directly
+            const dataStream = await client.downloadAttachment(pageId, attachment);
             await writeStream(dataStream, targetPath);
             downloaded += 1;
             console.log(`⬇️  ${chalk.green(attachment.title)} -> ${chalk.gray(targetPath)}`);

package/lib/confluence-client.js CHANGED Viewed

@@ -74,8 +74,12 @@ class ConfluenceClient {
   /**
    * Read a Confluence page content
+   * @param {string} pageIdOrUrl - Page ID or URL
+   * @param {string} format - Output format: 'text', 'html', or 'markdown'
+   * @param {object} options - Additional options
+   * @param {boolean} options.resolveUsers - Whether to resolve userkeys to display names (default: true for markdown)
    */
-  async readPage(pageIdOrUrl, format = 'text') {
+  async readPage(pageIdOrUrl, format = 'text', options = {}) {
     const pageId = this.extractPageId(pageIdOrUrl);
     const response = await this.client.get(`/content/${pageId}`, {
@@ -84,13 +88,26 @@ class ConfluenceClient {
       }
     });
-    const htmlContent = response.data.body.storage.value;
+    let htmlContent = response.data.body.storage.value;
     if (format === 'html') {
       return htmlContent;
     }
     if (format === 'markdown') {
+      // Resolve userkeys to display names before converting to markdown
+      const resolveUsers = options.resolveUsers !== false;
+      if (resolveUsers) {
+        const { html: resolvedHtml } = await this.resolveUserKeysInHtml(htmlContent);
+        htmlContent = resolvedHtml;
+      }
+      // Resolve page links to full URLs
+      const resolvePageLinks = options.resolvePageLinks !== false;
+      if (resolvePageLinks) {
+        htmlContent = await this.resolvePageLinksInHtml(htmlContent);
+      }
       return this.storageToMarkdown(htmlContent);
     }
@@ -167,6 +184,153 @@ class ConfluenceClient {
     }));
   }
+  /**
+   * Get user information by userkey
+   * @param {string} userKey - The user key (e.g., "8ad05c43962471ed0196c26107d7000c")
+   * @returns {Promise<{key: string, displayName: string, username: string}>}
+   */
+  async getUserByKey(userKey) {
+    try {
+      const response = await this.client.get('/user', {
+        params: { key: userKey }
+      });
+      return {
+        key: userKey,
+        displayName: response.data.displayName || response.data.username || userKey,
+        username: response.data.username || ''
+      };
+    } catch (error) {
+      // Return full userkey as fallback if user not found
+      return {
+        key: userKey,
+        displayName: userKey,
+        username: ''
+      };
+    }
+  }
+  /**
+   * Resolve all userkeys in HTML to display names
+   * @param {string} html - HTML content with ri:user elements
+   * @returns {Promise<{html: string, userMap: Map<string, string>}>}
+   */
+  async resolveUserKeysInHtml(html) {
+    // Extract all unique userkeys
+    const userKeyRegex = /ri:userkey="([^"]+)"/g;
+    const userKeys = new Set();
+    let match;
+    while ((match = userKeyRegex.exec(html)) !== null) {
+      userKeys.add(match[1]);
+    }
+    if (userKeys.size === 0) {
+      return { html, userMap: new Map() };
+    }
+    // Fetch user info for all keys in parallel
+    const userPromises = Array.from(userKeys).map(key => this.getUserByKey(key));
+    const users = await Promise.all(userPromises);
+    // Build userkey -> displayName map
+    const userMap = new Map();
+    users.forEach(user => {
+      userMap.set(user.key, user.displayName);
+    });
+    // Replace userkey references with display names in HTML
+    let resolvedHtml = html;
+    userMap.forEach((displayName, userKey) => {
+      // Replace <ac:link><ri:user ri:userkey="xxx" /></ac:link> with @displayName
+      const userLinkRegex = new RegExp(
+        `<ac:link>\\s*<ri:user\\s+ri:userkey="${userKey}"\\s*/>\\s*</ac:link>`,
+        'g'
+      );
+      resolvedHtml = resolvedHtml.replace(userLinkRegex, `@${displayName}`);
+    });
+    return { html: resolvedHtml, userMap };
+  }
+  /**
+   * Find a page by title and space key, return page info with URL
+   * @param {string} spaceKey - Space key (e.g., "~huotui" or "TECH")
+   * @param {string} title - Page title
+   * @returns {Promise<{title: string, url: string} | null>}
+   */
+  async findPageByTitleAndSpace(spaceKey, title) {
+    try {
+      const response = await this.client.get('/content', {
+        params: {
+          spaceKey: spaceKey,
+          title: title,
+          limit: 1
+        }
+      });
+      if (response.data.results && response.data.results.length > 0) {
+        const page = response.data.results[0];
+        const webui = page._links?.webui || '';
+        return {
+          title: page.title,
+          url: webui ? `https://${this.domain}/wiki${webui}` : ''
+        };
+      }
+      return null;
+    } catch (error) {
+      return null;
+    }
+  }
+  /**
+   * Resolve all page links in HTML to full URLs
+   * @param {string} html - HTML content with ri:page elements
+   * @returns {Promise<string>} - HTML with resolved page links
+   */
+  async resolvePageLinksInHtml(html) {
+    // Extract all page links: <ri:page ri:space-key="xxx" ri:content-title="yyy" />
+    const pageLinkRegex = /<ac:link>\s*<ri:page\s+ri:space-key="([^"]+)"\s+ri:content-title="([^"]+)"[^>]*(?:\/>|><\/ri:page>)\s*<\/ac:link>/g;
+    const pageLinks = [];
+    let match;
+    while ((match = pageLinkRegex.exec(html)) !== null) {
+      pageLinks.push({
+        fullMatch: match[0],
+        spaceKey: match[1],
+        title: match[2]
+      });
+    }
+    if (pageLinks.length === 0) {
+      return html;
+    }
+    // Fetch page info for all links in parallel
+    const pagePromises = pageLinks.map(async (link) => {
+      const pageInfo = await this.findPageByTitleAndSpace(link.spaceKey, link.title);
+      return {
+        ...link,
+        pageInfo
+      };
+    });
+    const resolvedLinks = await Promise.all(pagePromises);
+    // Replace page link references with markdown links
+    let resolvedHtml = html;
+    resolvedLinks.forEach(({ fullMatch, title, pageInfo }) => {
+      let replacement;
+      if (pageInfo && pageInfo.url) {
+        replacement = `[${title}](${pageInfo.url})`;
+      } else {
+        // Fallback to just the title if page not found
+        replacement = `[${title}]`;
+      }
+      resolvedHtml = resolvedHtml.replace(fullMatch, replacement);
+    });
+    return resolvedHtml;
+  }
   /**
    * List attachments for a page with pagination support
    */
@@ -228,13 +392,40 @@ class ConfluenceClient {
   /**
    * Download an attachment's data stream
+   * Now uses the download link from attachment metadata instead of the broken REST API endpoint
    */
-  async downloadAttachment(pageIdOrUrl, attachmentId, options = {}) {
-    const pageId = this.extractPageId(pageIdOrUrl);
-    const response = await this.client.get(`/content/${pageId}/child/attachment/${attachmentId}/data`, {
-      responseType: options.responseType || 'stream'
+  async downloadAttachment(pageIdOrUrl, attachmentIdOrAttachment, options = {}) {
+    let downloadUrl;
+    // If the second argument is an attachment object with downloadLink, use it directly
+    if (typeof attachmentIdOrAttachment === 'object' && attachmentIdOrAttachment.downloadLink) {
+      downloadUrl = attachmentIdOrAttachment.downloadLink;
+    } else {
+      // Otherwise, fetch attachment info to get the download link
+      const pageId = this.extractPageId(pageIdOrUrl);
+      const attachmentId = attachmentIdOrAttachment;
+      const response = await this.client.get(`/content/${pageId}/child/attachment`, {
+        params: { limit: 500 }
+      });
+      const attachment = response.data.results.find(att => att.id === String(attachmentId));
+      if (!attachment) {
+        throw new Error(`Attachment with ID ${attachmentId} not found on page ${pageId}`);
+      }
+      downloadUrl = this.toAbsoluteUrl(attachment._links?.download);
+    }
+    if (!downloadUrl) {
+      throw new Error('Unable to determine download URL for attachment');
+    }
+    // Download directly using axios with the same auth headers
+    const downloadResponse = await axios.get(downloadUrl, {
+      responseType: options.responseType || 'stream',
+      headers: {
+        'Authorization': this.authType === 'basic' ? this.buildBasicAuthHeader() : `Bearer ${this.token}`
+      }
     });
-    return response.data;
+    return downloadResponse.data;
   }
   /**
@@ -402,14 +593,54 @@ class ConfluenceClient {
   /**
    * Convert Confluence storage format to markdown
+   * @param {string} storage - Confluence storage format HTML
+   * @param {object} options - Conversion options
+   * @param {string} options.attachmentsDir - Directory name for attachments (default: 'attachments')
    */
-  storageToMarkdown(storage) {
+  storageToMarkdown(storage, options = {}) {
+    const attachmentsDir = options.attachmentsDir || 'attachments';
     let markdown = storage;
     // Remove table of contents macro
     markdown = markdown.replace(/<ac:structured-macro ac:name="toc"[^>]*\s*\/>/g, '');
     markdown = markdown.replace(/<ac:structured-macro ac:name="toc"[^>]*>[\s\S]*?<\/ac:structured-macro>/g, '');
+    // Remove floatmenu macro (floating table of contents)
+    markdown = markdown.replace(/<ac:structured-macro ac:name="floatmenu"[^>]*>[\s\S]*?<\/ac:structured-macro>/g, '');
+    // Convert Confluence images to markdown images
+    // Format: <ac:image><ri:attachment ri:filename="image.png" /></ac:image>
+    markdown = markdown.replace(/<ac:image[^>]*>\s*<ri:attachment\s+ri:filename="([^"]+)"[^>]*\s*\/>\s*<\/ac:image>/g, (_, filename) => {
+      return `![${filename}](${attachmentsDir}/${filename})`;
+    });
+    // Also handle self-closing ac:image with ri:attachment
+    markdown = markdown.replace(/<ac:image[^>]*><ri:attachment\s+ri:filename="([^"]+)"[^>]*><\/ri:attachment><\/ac:image>/g, (_, filename) => {
+      return `![${filename}](${attachmentsDir}/${filename})`;
+    });
+    // Convert mermaid macro to mermaid code block
+    markdown = markdown.replace(/<ac:structured-macro ac:name="mermaid-macro"[^>]*>[\s\S]*?<ac:plain-text-body><!\[CDATA\[([\s\S]*?)\]\]><\/ac:plain-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, code) => {
+      return `\n\`\`\`mermaid\n${code.trim()}\n\`\`\`\n`;
+    });
+    // Convert expand macro - extract content from rich-text-body
+    // Detect language based on content for the expand summary text
+    const detectExpandSummary = (text) => {
+      if (/[\u4e00-\u9fa5]/.test(text)) return '展开详情';      // Chinese
+      if (/[\u3040-\u309f\u30a0-\u30ff]/.test(text)) return '詳細を表示'; // Japanese
+      if (/[\uac00-\ud7af]/.test(text)) return '상세 보기';    // Korean
+      if (/[\u0400-\u04ff]/.test(text)) return 'Подробнее';    // Russian/Cyrillic
+      if (/[àâäéèêëïîôùûüÿœæç]/i.test(text)) return 'Détails'; // French
+      if (/[äöüß]/i.test(text)) return 'Details';              // German
+      if (/[áéíóúñ¿¡]/i.test(text)) return 'Detalles';         // Spanish
+      return 'Expand Details';  // Default: English
+    };
+    const expandSummary = detectExpandSummary(markdown);
+    markdown = markdown.replace(/<ac:structured-macro ac:name="expand"[^>]*>[\s\S]*?<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, content) => {
+      return `\n<details>\n<summary>${expandSummary}</summary>\n\n${content}\n\n</details>\n`;
+    });
     // Convert Confluence code macros to markdown
     markdown = markdown.replace(/<ac:structured-macro ac:name="code"[^>]*>[\s\S]*?<ac:parameter ac:name="language">([^<]*)<\/ac:parameter>[\s\S]*?<ac:plain-text-body><!\[CDATA\[([\s\S]*?)\]\]><\/ac:plain-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, lang, code) => {
       return `\`\`\`${lang}\n${code}\n\`\`\``;
@@ -438,12 +669,40 @@ class ConfluenceClient {
       return `[!note]\n${cleanContent}`;
     });
+    // Convert task list macros to markdown checkboxes
+    // Note: This is independent of user resolution - it only converts <ac:task> structure to "- [ ]" or "- [x]" format
+    markdown = markdown.replace(/<ac:task-list>([\s\S]*?)<\/ac:task-list>/g, (_, content) => {
+      const tasks = [];
+      // Match each task: <ac:task>...<ac:task-status>xxx</ac:task-status>...<ac:task-body>...</ac:task-body>...</ac:task>
+      const taskRegex = /<ac:task>[\s\S]*?<ac:task-status>([^<]*)<\/ac:task-status>[\s\S]*?<ac:task-body>([\s\S]*?)<\/ac:task-body>[\s\S]*?<\/ac:task>/g;
+      let match;
+      while ((match = taskRegex.exec(content)) !== null) {
+        const status = match[1];
+        let taskBody = match[2];
+        // Clean up HTML from task body, but preserve @username
+        taskBody = taskBody.replace(/<[^>]+>/g, '').replace(/\s+/g, ' ').trim();
+        const checkbox = status === 'complete' ? '[x]' : '[ ]';
+        if (taskBody) {
+          tasks.push(`- ${checkbox} ${taskBody}`);
+        }
+      }
+      return tasks.length > 0 ? '\n' + tasks.join('\n') + '\n' : '';
+    });
     // Remove other unhandled macros (replace with empty string for now)
     markdown = markdown.replace(/<ac:structured-macro[^>]*>[\s\S]*?<\/ac:structured-macro>/g, '');
-    // Convert links
+    // Convert external URL links
     markdown = markdown.replace(/<ac:link><ri:url ri:value="([^"]*)" \/><ac:plain-text-link-body><!\[CDATA\[([^\]]*)\]\]><\/ac:plain-text-link-body><\/ac:link>/g, '[$2]($1)');
+    // Convert internal page links - extract page title
+    // Format: <ac:link><ri:page ri:space-key="xxx" ri:content-title="Page Title" /></ac:link>
+    markdown = markdown.replace(/<ac:link>\s*<ri:page[^>]*ri:content-title="([^"]*)"[^>]*\/>\s*<\/ac:link>/g, '[$1]');
+    markdown = markdown.replace(/<ac:link>\s*<ri:page[^>]*ri:content-title="([^"]*)"[^>]*>\s*<\/ri:page>\s*<\/ac:link>/g, '[$1]');
+    // Remove any remaining ac:link tags that weren't matched
+    markdown = markdown.replace(/<ac:link>[\s\S]*?<\/ac:link>/g, '');
     // Convert remaining HTML to markdown
     markdown = this.htmlToMarkdown(markdown);
@@ -456,6 +715,10 @@ class ConfluenceClient {
   htmlToMarkdown(html) {
     let markdown = html;
+    // Convert time elements to date text BEFORE removing attributes
+    // Format: <time datetime="2025-09-16" /> or <time datetime="2025-09-16"></time>
+    markdown = markdown.replace(/<time\s+datetime="([^"]+)"[^>]*(?:\/>|>\s*<\/time>)/g, '$1');
     // Convert strong/bold BEFORE removing HTML attributes
     markdown = markdown.replace(/<strong[^>]*>(.*?)<\/strong>/g, '**$1**');
@@ -560,8 +823,8 @@ class ConfluenceClient {
     // Convert horizontal rules
     markdown = markdown.replace(/<hr\s*\/?>/g, '\n---\n');
-    // Remove any remaining HTML tags
-    markdown = markdown.replace(/<[^>]+>/g, ' ');
+    // Remove any remaining HTML tags, but preserve <details> and <summary> for GFM compatibility
+    markdown = markdown.replace(/<(?!\/?(details|summary)\b)[^>]+>/g, ' ');
     // Clean up whitespace and HTML entities
     markdown = markdown.replace(/&nbsp;/g, ' ');
@@ -569,10 +832,35 @@ class ConfluenceClient {
     markdown = markdown.replace(/&gt;/g, '>');
     markdown = markdown.replace(/&amp;/g, '&');
     markdown = markdown.replace(/&quot;/g, '"');
+    markdown = markdown.replace(/&apos;/g, '\'');
+    // Smart quotes and special characters
+    markdown = markdown.replace(/&ldquo;/g, '"');
+    markdown = markdown.replace(/&rdquo;/g, '"');
+    markdown = markdown.replace(/&lsquo;/g, '\'');
+    markdown = markdown.replace(/&rsquo;/g, '\'');
+    markdown = markdown.replace(/&mdash;/g, '—');
+    markdown = markdown.replace(/&ndash;/g, '–');
+    markdown = markdown.replace(/&hellip;/g, '...');
+    markdown = markdown.replace(/&bull;/g, '•');
+    markdown = markdown.replace(/&copy;/g, '©');
+    markdown = markdown.replace(/&reg;/g, '®');
+    markdown = markdown.replace(/&trade;/g, '™');
+    // Numeric HTML entities
+    markdown = markdown.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)));
+    markdown = markdown.replace(/&#x([0-9a-fA-F]+);/g, (_, code) => String.fromCharCode(parseInt(code, 16)));
-    // Clean up extra whitespace
+    // Clean up extra whitespace for standard Markdown format
+    // Remove trailing spaces from each line
+    markdown = markdown.replace(/[ \t]+$/gm, '');
+    // Remove leading spaces from lines (except for code blocks, blockquotes, and list items)
+    markdown = markdown.replace(/^[ \t]+(?!([`>]|[*+-] |\d+[.)] ))/gm, '');
+    // Ensure proper spacing after headings (# Title should be followed by blank line or content)
+    markdown = markdown.replace(/^(#{1,6}[^\n]+)\n(?!\n)/gm, '$1\n\n');
+    // Normalize multiple blank lines to double newline
     markdown = markdown.replace(/\n\s*\n\s*\n+/g, '\n\n');
+    // Collapse multiple spaces to single space (but preserve newlines)
     markdown = markdown.replace(/[ \t]+/g, ' ');
+    // Final trim
     markdown = markdown.trim();
     return markdown;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "confluence-cli",
-  "version": "1.10.0",
+  "version": "1.10.1",
   "description": "A command-line interface for Atlassian Confluence with page creation and editing capabilities",
   "main": "index.js",
   "bin": {