npm - confluence-cli - Versions diffs - 1.31.0 → 1.32.0 - Mend

confluence-cli 1.31.0 → 1.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +54 -2
package/bin/confluence.js +53 -24
package/lib/confluence-client.js +179 -588
package/lib/html-to-markdown.js +150 -0
package/lib/macro-converter.js +298 -0
package/npm-shrinkwrap.json +2 -2
package/package.json +1 -1
package/plugins/confluence/skills/confluence/SKILL.md +6 -4

package/lib/confluence-client.js CHANGED Viewed

@@ -4,28 +4,30 @@ const https = require('https');
 const path = require('path');
 const FormData = require('form-data');
 const { convert } = require('html-to-text');
-const MarkdownIt = require('markdown-it');
-const NAMED_ENTITIES = {
-  // uppercase variants
-  aring: 'å', auml: 'ä', ouml: 'ö',
-  eacute: 'é', egrave: 'è', ecirc: 'ê', euml: 'ë',
-  aacute: 'á', agrave: 'à', acirc: 'â', atilde: 'ã',
-  oacute: 'ó', ograve: 'ò', ocirc: 'ô', otilde: 'õ',
-  uacute: 'ú', ugrave: 'ù', ucirc: 'û', uuml: 'ü',
-  iacute: 'í', igrave: 'ì', icirc: 'î', iuml: 'ï',
-  ntilde: 'ñ', ccedil: 'ç', szlig: 'ß', yuml: 'ÿ',
-  eth: 'ð', thorn: 'þ',
-  // uppercase variants
-  Aring: 'Å', Auml: 'Ä', Ouml: 'Ö',
-  Eacute: 'É', Egrave: 'È', Ecirc: 'Ê', Euml: 'Ë',
-  Aacute: 'Á', Agrave: 'À', Acirc: 'Â', Atilde: 'Ã',
-  Oacute: 'Ó', Ograve: 'Ò', Ocirc: 'Ô', Otilde: 'Õ',
-  Uacute: 'Ú', Ugrave: 'Ù', Ucirc: 'Û', Uuml: 'Ü',
-  Iacute: 'Í', Igrave: 'Ì', Icirc: 'Î', Iuml: 'Ï',
-  Ntilde: 'Ñ', Ccedil: 'Ç', Szlig: 'SS', Yuml: 'Ÿ',
-  Eth: 'Ð', Thorn: 'Þ'
-};
+const MacroConverter = require('./macro-converter');
+const { htmlToMarkdown, NAMED_ENTITIES } = require('./html-to-markdown');
+function createSemaphore(limit) {
+  let active = 0;
+  const waiters = [];
+  return {
+    async acquire() {
+      if (active < limit) {
+        active++;
+        return;
+      }
+      await new Promise(resolve => waiters.push(resolve));
+    },
+    release() {
+      if (waiters.length > 0) {
+        const next = waiters.shift();
+        next();
+      } else {
+        active--;
+      }
+    }
+  };
+}
 class ConfluenceClient {
   constructor(config) {
@@ -39,10 +41,14 @@ class ConfluenceClient {
     this.forceCloud = !!config.forceCloud;
     this.mtls = config.mtls;
     this.apiPath = this.sanitizeApiPath(config.apiPath);
-    this.webUrlPrefix = this.apiPath.startsWith('/wiki/') ? '/wiki' : '';
+    this.webUrlPrefix = this.apiPath.includes('/wiki/') ? '/wiki' : '';
     this.baseURL = `${this.protocol}://${this.domain}${this.apiPath}`;
-    this.markdown = new MarkdownIt();
-    this.setupConfluenceMarkdownExtensions();
+    this.converter = new MacroConverter({
+      isCloud: this.isCloud(),
+      webUrlPrefix: this.webUrlPrefix,
+      buildUrl: (pathOrUrl) => this.buildUrl(pathOrUrl),
+    });
+    this.markdown = this.converter.markdown;
     const headers = {
       'Content-Type': 'application/json',
@@ -318,7 +324,7 @@ class ConfluenceClient {
   /**
    * Read a Confluence page content
    * @param {string} pageIdOrUrl - Page ID or URL
-   * @param {string} format - Output format: 'text', 'html', or 'markdown'
+   * @param {string} format - Output format: 'text', 'html', 'storage', or 'markdown'
    * @param {object} options - Additional options
    * @param {boolean} options.resolveUsers - Whether to resolve userkeys to display names (default: true for markdown)
    * @param {boolean} options.extractReferencedAttachments - Whether to extract referenced attachments (default: false)
@@ -339,7 +345,7 @@ class ConfluenceClient {
       this._referencedAttachments = this.extractReferencedAttachments(htmlContent);
     }
-    if (format === 'html') {
+    if (format === 'html' || format === 'storage') {
       return htmlContent;
     }
@@ -383,17 +389,11 @@ class ConfluenceClient {
     const response = await this.client.get(`/content/${pageId}`, {
       params: {
-        expand: 'space'
+        expand: 'space,history,version,ancestors'
       }
     });
-    return {
-      title: response.data.title,
-      id: response.data.id,
-      type: response.data.type,
-      status: response.data.status,
-      space: response.data.space
-    };
+    return this.normalizePage(response.data);
   }
   /**
@@ -540,7 +540,7 @@ class ConfluenceClient {
         const webui = page._links?.webui || '';
         return {
           title: page.title,
-          url: webui ? this.buildUrl(`${this.webUrlPrefix}${webui}`) : ''
+          url: webui ? this.toAbsoluteUrl(webui, page._links?.base) : ''
         };
       }
       return null;
@@ -634,7 +634,7 @@ class ConfluenceClient {
       // Format: - [Page Title](URL)
       const childPagesList = childPages.map(page => {
         const webui = page._links?.webui || '';
-        const url = webui ? this.buildUrl(`${this.webUrlPrefix}${webui}`) : '';
+        const url = webui ? this.toAbsoluteUrl(webui, page._links?.base) : '';
         if (url) {
           return `- [${page.title}](${url})`;
         } else {
@@ -1160,550 +1160,32 @@ class ConfluenceClient {
     return { pageId: String(pageId), key };
   }
-  /**
-   * Convert markdown to Confluence storage format
-   */
   markdownToStorage(markdown) {
-    // Convert markdown to HTML first
-    const html = this.markdown.render(markdown);
-    // Convert HTML to native Confluence storage format elements
-    return this.htmlToConfluenceStorage(html);
+    return this.converter.markdownToStorage(markdown);
   }
-  /**
-   * Convert HTML to native Confluence storage format
-   */
   htmlToConfluenceStorage(html) {
-    let storage = html;
-    // Convert headings to native Confluence format
-    storage = storage.replace(/<h([1-6])>(.*?)<\/h[1-6]>/g, '<h$1>$2</h$1>');
-    // Convert paragraphs
-    storage = storage.replace(/<p>(.*?)<\/p>/g, '<p>$1</p>');
-    // Convert strong/bold text
-    storage = storage.replace(/<strong>(.*?)<\/strong>/g, '<strong>$1</strong>');
-    // Convert emphasis/italic text
-    storage = storage.replace(/<em>(.*?)<\/em>/g, '<em>$1</em>');
-    // Convert unordered lists
-    storage = storage.replace(/<ul>(.*?)<\/ul>/gs, '<ul>$1</ul>');
-    storage = storage.replace(/<li>(.*?)<\/li>/g, '<li><p>$1</p></li>');
-    // Convert ordered lists
-    storage = storage.replace(/<ol>(.*?)<\/ol>/gs, '<ol>$1</ol>');
-    // Convert code blocks to Confluence code macro
-    storage = storage.replace(/<pre><code(?:\s+class="language-(\w+)")?>(.*?)<\/code><\/pre>/gs, (_, lang, code) => {
-      const language = lang || 'text';
-      // Trim trailing newline added by markdown-it during HTML rendering,
-      // and decode HTML entities that markdown-it encodes inside <code> blocks
-      // so they appear as literal characters in the CDATA output
-      const decodedCode = code.replace(/\n$/, '')
-        .replace(/&quot;/g, '"')
-        .replace(/&lt;/g, '<')
-        .replace(/&gt;/g, '>')
-        .replace(/&amp;/g, '&');   // & last to avoid double-decoding
-      const safeCode = decodedCode.replace(/]]>/g, ']]]]><![CDATA[>');
-      return `<ac:structured-macro ac:name="code"><ac:parameter ac:name="language">${language}</ac:parameter><ac:plain-text-body><![CDATA[${safeCode}]]></ac:plain-text-body></ac:structured-macro>`;
-    });
-    // Convert inline code
-    storage = storage.replace(/<code>(.*?)<\/code>/g, '<code>$1</code>');
-    // Convert blockquotes to appropriate macros based on content
-    storage = storage.replace(/<blockquote>(.*?)<\/blockquote>/gs, (_, content) => {
-      // Check for admonition patterns
-      if (content.includes('<strong>INFO</strong>')) {
-        const cleanContent = content.replace(/<p><strong>INFO<\/strong><\/p>\s*/, '');
-        return `<ac:structured-macro ac:name="info">
-          <ac:rich-text-body>${cleanContent}</ac:rich-text-body>
-        </ac:structured-macro>`;
-      } else if (content.includes('<strong>WARNING</strong>')) {
-        const cleanContent = content.replace(/<p><strong>WARNING<\/strong><\/p>\s*/, '');
-        return `<ac:structured-macro ac:name="warning">
-          <ac:rich-text-body>${cleanContent}</ac:rich-text-body>
-        </ac:structured-macro>`;
-      } else if (content.includes('<strong>NOTE</strong>')) {
-        const cleanContent = content.replace(/<p><strong>NOTE<\/strong><\/p>\s*/, '');
-        return `<ac:structured-macro ac:name="note">
-          <ac:rich-text-body>${cleanContent}</ac:rich-text-body>
-        </ac:structured-macro>`;
-      } else {
-        // Default to info macro for regular blockquotes
-        return `<ac:structured-macro ac:name="info">
-          <ac:rich-text-body>${content}</ac:rich-text-body>
-        </ac:structured-macro>`;
-      }
-    });
-    // Convert tables
-    storage = storage.replace(/<table>(.*?)<\/table>/gs, '<table>$1</table>');
-    storage = storage.replace(/<thead>(.*?)<\/thead>/gs, '<thead>$1</thead>');
-    storage = storage.replace(/<tbody>(.*?)<\/tbody>/gs, '<tbody>$1</tbody>');
-    storage = storage.replace(/<tr>(.*?)<\/tr>/gs, '<tr>$1</tr>');
-    storage = storage.replace(/<th>(.*?)<\/th>/g, '<th><p>$1</p></th>');
-    storage = storage.replace(/<td>(.*?)<\/td>/g, '<td><p>$1</p></td>');
-    // Convert links
-    // Confluence Cloud does not render ac:link + ri:url; use smart links instead.
-    // Server/Data Center instances continue to use the ac:link storage format.
-    if (this.isCloud()) {
-      storage = storage.replace(/<a href="(.*?)">(.*?)<\/a>/g, '<a href="$1" data-card-appearance="inline">$2</a>');
-    } else {
-      storage = storage.replace(/<a href="(.*?)">(.*?)<\/a>/g, '<ac:link><ri:url ri:value="$1" /><ac:plain-text-link-body><![CDATA[$2]]></ac:plain-text-link-body></ac:link>');
-    }
-    // Convert horizontal rules
-    storage = storage.replace(/<hr\s*\/?>/g, '<hr />');
-    // Note: Do NOT globally decode &lt; &gt; &amp; here. These represent literal
-    // characters in user content (e.g. <placeholder> in inline text) and
-    // Confluence storage format renders them correctly as-is. Code block
-    // entities are decoded separately above before CDATA insertion.
-    return storage;
+    return this.converter.htmlToConfluenceStorage(html);
   }
-  /**
-   * Convert markdown to Confluence storage format using native storage format
-   */
   markdownToNativeStorage(markdown) {
-    // Convert markdown to HTML first
-    const html = this.markdown.render(markdown);
-    // Delegate to htmlToConfluenceStorage for proper conversion including code blocks
-    return this.htmlToConfluenceStorage(html);
+    return this.converter.markdownToNativeStorage(markdown);
   }
-  /**
-   * Setup Confluence-specific markdown extensions
-   */
   setupConfluenceMarkdownExtensions() {
-    // Enable additional markdown-it features
-    this.markdown.enable(['table', 'strikethrough', 'linkify']);
-    // Add custom rule for Confluence macros in markdown
-    this.markdown.core.ruler.before('normalize', 'confluence_macros', (state) => {
-      const src = state.src;
-      // Convert [!info] admonitions to info macro
-      state.src = src.replace(/\[!info\]\s*([\s\S]*?)(?=\n\s*\n|\n\s*\[!|$)/g, (_, content) => {
-        return `> **INFO**\n> ${content.trim().replace(/\n/g, '\n> ')}`;
-      });
-      // Convert [!warning] admonitions to warning macro
-      state.src = state.src.replace(/\[!warning\]\s*([\s\S]*?)(?=\n\s*\n|\n\s*\[!|$)/g, (_, content) => {
-        return `> **WARNING**\n> ${content.trim().replace(/\n/g, '\n> ')}`;
-      });
-      // Convert [!note] admonitions to note macro
-      state.src = state.src.replace(/\[!note\]\s*([\s\S]*?)(?=\n\s*\n|\n\s*\[!|$)/g, (_, content) => {
-        return `> **NOTE**\n> ${content.trim().replace(/\n/g, '\n> ')}`;
-      });
-      // Convert task lists to proper format
-      state.src = state.src.replace(/^(\s*)- \[([ x])\] (.+)$/gm, (_, indent, checked, text) => {
-        return `${indent}- [${checked}] ${text}`;
-      });
-    });
+    this.converter.setupConfluenceMarkdownExtensions();
   }
-  /**
-   * Detect language from text content and return appropriate labels
-   * @param {string} text - Text content to analyze
-   * @returns {object} Object with language-specific labels
-   */
   detectLanguageLabels(text) {
-    const labels = {
-      includePage: 'Include Page',
-      sharedBlock: 'Shared Block',
-      includeSharedBlock: 'Include Shared Block',
-      fromPage: 'from page',
-      expandDetails: 'Expand Details'
-    };
-    if (/[\u4e00-\u9fa5]/.test(text)) {
-      // Chinese
-      labels.includePage = '包含页面';
-      labels.sharedBlock = '共享块';
-      labels.includeSharedBlock = '包含共享块';
-      labels.fromPage = '来自页面';
-      labels.expandDetails = '展开详情';
-    } else if (/[\u3040-\u309f\u30a0-\u30ff]/.test(text)) {
-      // Japanese
-      labels.includePage = 'ページを含む';
-      labels.sharedBlock = '共有ブロック';
-      labels.includeSharedBlock = '共有ブロックを含む';
-      labels.fromPage = 'ページから';
-      labels.expandDetails = '詳細を表示';
-    } else if (/[\uac00-\ud7af]/.test(text)) {
-      // Korean
-      labels.includePage = '페이지 포함';
-      labels.sharedBlock = '공유 블록';
-      labels.includeSharedBlock = '공유 블록 포함';
-      labels.fromPage = '페이지에서';
-      labels.expandDetails = '상세 보기';
-    } else if (/[\u0400-\u04ff]/.test(text)) {
-      // Russian/Cyrillic
-      labels.includePage = 'Включить страницу';
-      labels.sharedBlock = 'Общий блок';
-      labels.includeSharedBlock = 'Включить общий блок';
-      labels.fromPage = 'со страницы';
-      labels.expandDetails = 'Подробнее';
-    } else if ((text.match(/[àâäéèêëïîôùûüÿœæç]/gi) || []).length >= 2) {
-      // French (requires at least 2 French-specific characters to avoid false positives)
-      labels.includePage = 'Inclure la page';
-      labels.sharedBlock = 'Bloc partagé';
-      labels.includeSharedBlock = 'Inclure le bloc partagé';
-      labels.fromPage = 'de la page';
-      labels.expandDetails = 'Détails';
-    } else if ((text.match(/[äöüß]/gi) || []).length >= 2) {
-      // German (requires at least 2 German-specific characters)
-      // Note: French is checked before German because French regex includes more characters
-      // that overlap with German (ä, ü). The threshold helps distinguish between them.
-      labels.includePage = 'Seite einbinden';
-      labels.sharedBlock = 'Gemeinsamer Block';
-      labels.includeSharedBlock = 'Gemeinsamen Block einbinden';
-      labels.fromPage = 'von Seite';
-      labels.expandDetails = 'Details';
-    } else if ((text.match(/[áéíóúñ¿¡]/gi) || []).length >= 2) {
-      // Spanish (requires at least 2 Spanish-specific characters)
-      labels.includePage = 'Incluir página';
-      labels.sharedBlock = 'Bloque compartido';
-      labels.includeSharedBlock = 'Incluir bloque compartido';
-      labels.fromPage = 'de la página';
-      labels.expandDetails = 'Detalles';
-    }
-    return labels;
+    return this.converter.detectLanguageLabels(text);
   }
-  /**
-   * Convert Confluence storage format to markdown
-   * @param {string} storage - Confluence storage format HTML
-   * @param {object} options - Conversion options
-   * @param {string} options.attachmentsDir - Directory name for attachments (default: 'attachments')
-   */
   storageToMarkdown(storage, options = {}) {
-    const attachmentsDir = options.attachmentsDir || 'attachments';
-    let markdown = storage;
-    // Detect language from content
-    const labels = this.detectLanguageLabels(markdown);
-    // Remove table of contents macro
-    markdown = markdown.replace(/<ac:structured-macro ac:name="toc"[^>]*\s*\/>/g, '');
-    markdown = markdown.replace(/<ac:structured-macro ac:name="toc"[^>]*>[\s\S]*?<\/ac:structured-macro>/g, '');
-    // Remove floatmenu macro (floating table of contents)
-    markdown = markdown.replace(/<ac:structured-macro ac:name="floatmenu"[^>]*>[\s\S]*?<\/ac:structured-macro>/g, '');
-    // Convert Confluence images to markdown images
-    // Format: <ac:image><ri:attachment ri:filename="image.png" /></ac:image>
-    markdown = markdown.replace(/<ac:image[^>]*>\s*<ri:attachment\s+ri:filename="([^"]+)"[^>]*\s*\/>\s*<\/ac:image>/g, (_, filename) => {
-      return `![${filename}](${attachmentsDir}/${filename})`;
-    });
-    // Also handle self-closing ac:image with ri:attachment
-    markdown = markdown.replace(/<ac:image[^>]*><ri:attachment\s+ri:filename="([^"]+)"[^>]*><\/ri:attachment><\/ac:image>/g, (_, filename) => {
-      return `![${filename}](${attachmentsDir}/${filename})`;
-    });
-    // Convert mermaid macro to mermaid code block
-    markdown = markdown.replace(/<ac:structured-macro ac:name="mermaid-macro"[^>]*>[\s\S]*?<ac:plain-text-body><!\[CDATA\[([\s\S]*?)\]\]><\/ac:plain-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, code) => {
-      return `\n\`\`\`mermaid\n${code.trim()}\n\`\`\`\n`;
-    });
-    // Convert expand macro - extract content from rich-text-body
-    markdown = markdown.replace(/<ac:structured-macro ac:name="expand"[^>]*>[\s\S]*?<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, content) => {
-      return `\n<details>\n<summary>${labels.expandDetails}</summary>\n\n${content}\n\n</details>\n`;
-    });
-    // Convert Confluence code macros to markdown
-    markdown = markdown.replace(/<ac:structured-macro ac:name="code"[^>]*>[\s\S]*?<ac:parameter ac:name="language">([^<]*)<\/ac:parameter>[\s\S]*?<ac:plain-text-body><!\[CDATA\[([\s\S]*?)\]\]><\/ac:plain-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, lang, code) => {
-      return `\n\`\`\`${lang}\n${code}\n\`\`\`\n`;
-    });
-    // Convert code macros without language parameter
-    markdown = markdown.replace(/<ac:structured-macro ac:name="code"[^>]*>[\s\S]*?<ac:plain-text-body><!\[CDATA\[([\s\S]*?)\]\]><\/ac:plain-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, code) => {
-      return `\n\`\`\`\n${code}\n\`\`\`\n`;
-    });
-    // Convert info macro to admonition
-    markdown = markdown.replace(/<ac:structured-macro ac:name="info"[^>]*>[\s\S]*?<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, content) => {
-      const cleanContent = this.htmlToMarkdown(content);
-      return `[!info]\n${cleanContent}`;
-    });
-    // Convert warning macro to admonition
-    markdown = markdown.replace(/<ac:structured-macro ac:name="warning"[^>]*>[\s\S]*?<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, content) => {
-      const cleanContent = this.htmlToMarkdown(content);
-      return `[!warning]\n${cleanContent}`;
-    });
-    // Convert note macro to admonition
-    markdown = markdown.replace(/<ac:structured-macro ac:name="note"[^>]*>[\s\S]*?<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, content) => {
-      const cleanContent = this.htmlToMarkdown(content);
-      return `[!note]\n${cleanContent}`;
-    });
-    // Convert task list macros to markdown checkboxes
-    // Note: This is independent of user resolution - it only converts <ac:task> structure to "- [ ]" or "- [x]" format
-    markdown = markdown.replace(/<ac:task-list>([\s\S]*?)<\/ac:task-list>/g, (_, content) => {
-      const tasks = [];
-      // Match each task: <ac:task>...<ac:task-status>xxx</ac:task-status>...<ac:task-body>...</ac:task-body>...</ac:task>
-      const taskRegex = /<ac:task>[\s\S]*?<ac:task-status>([^<]*)<\/ac:task-status>[\s\S]*?<ac:task-body>([\s\S]*?)<\/ac:task-body>[\s\S]*?<\/ac:task>/g;
-      let match;
-      while ((match = taskRegex.exec(content)) !== null) {
-        const status = match[1];
-        let taskBody = match[2];
-        // Clean up HTML from task body, but preserve @username
-        taskBody = taskBody.replace(/<[^>]+>/g, '').replace(/\s+/g, ' ').trim();
-        const checkbox = status === 'complete' ? '[x]' : '[ ]';
-        if (taskBody) {
-          tasks.push(`- ${checkbox} ${taskBody}`);
-        }
-      }
-      return tasks.length > 0 ? '\n' + tasks.join('\n') + '\n' : '';
-    });
-    // Convert panel macro to markdown blockquote with title
-    markdown = markdown.replace(/<ac:structured-macro ac:name="panel"[^>]*>[\s\S]*?<ac:parameter ac:name="title">([^<]*)<\/ac:parameter>[\s\S]*?<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, title, content) => {
-      const cleanContent = this.htmlToMarkdown(content);
-      return `\n> **${title}**\n>\n${cleanContent.split('\n').map(line => line ? `> ${line}` : '>').join('\n')}\n`;
-    });
-    // Convert include macro - extract page link and convert to markdown link
-    // Handle both with and without parameter name
-    markdown = markdown.replace(/<ac:structured-macro ac:name="include"[^>]*>[\s\S]*?<ac:parameter ac:name="">[\s\S]*?<ac:link>[\s\S]*?<ri:page\s+ri:space-key="([^"]+)"\s+ri:content-title="([^"]+)"[^>]*\/>[\s\S]*?<\/ac:link>[\s\S]*?<\/ac:parameter>[\s\S]*?<\/ac:structured-macro>/g, (_, spaceKey, title) => {
-      // Try to build a proper URL - if spaceKey starts with ~, it's a user space
-      if (spaceKey.startsWith('~')) {
-        const spacePath = `display/${spaceKey}/${encodeURIComponent(title)}`;
-        return `\n> 📄 **${labels.includePage}**: [${title}](${this.buildUrl(`${this.webUrlPrefix}/${spacePath}`)})\n`;
-      } else {
-        // For non-user spaces, we cannot construct a valid link without the page ID.
-        // Document that manual correction is required.
-        return `\n> 📄 **${labels.includePage}**: [${title}](${this.buildUrl(`${this.webUrlPrefix}/spaces/${spaceKey}/pages/[PAGE_ID_HERE]`)}) _(manual link correction required)_\n`;
-      }
-    });
-    // Convert shared-block and include-shared-block macros - extract content
-    markdown = markdown.replace(/<ac:structured-macro ac:name="(shared-block|include-shared-block)"[^>]*>[\s\S]*?<ac:parameter ac:name="shared-block-key">([^<]*)<\/ac:parameter>[\s\S]*?<ac:rich-text-body>([\s\S]*?)<\/ac:rich-text-body>[\s\S]*?<\/ac:structured-macro>/g, (_, macroType, blockKey, content) => {
-      const cleanContent = this.htmlToMarkdown(content);
-      return `\n> **${labels.sharedBlock}: ${blockKey}**\n>\n${cleanContent.split('\n').map(line => line ? `> ${line}` : '>').join('\n')}\n`;
-    });
-    // Convert include-shared-block with page parameter
-    markdown = markdown.replace(/<ac:structured-macro ac:name="include-shared-block"[^>]*>[\s\S]*?<ac:parameter ac:name="shared-block-key">([^<]*)<\/ac:parameter>[\s\S]*?<ac:parameter ac:name="page">[\s\S]*?<ac:link>[\s\S]*?<ri:page\s+ri:space-key="([^"]+)"\s+ri:content-title="([^"]+)"[^>]*\/>[\s\S]*?<\/ac:link>[\s\S]*?<\/ac:parameter>[\s\S]*?<\/ac:structured-macro>/g, (_, blockKey, spaceKey, pageTitle) => {
-      // The page ID is not available, so we cannot generate a valid link.
-      // Instead, document that the link needs manual correction.
-      return `\n> 📄 **${labels.includeSharedBlock}**: ${blockKey} (${labels.fromPage}: ${pageTitle} [link needs manual correction])\n`;
-    });
-    // Convert view-file macro to file link
-    // Handle both orders: name first or height first
-    markdown = markdown.replace(/<ac:structured-macro ac:name="view-file"[^>]*>[\s\S]*?<ac:parameter ac:name="name">[\s\S]*?<ri:attachment\s+ri:filename="([^"]+)"[^>]*\/>[\s\S]*?<\/ac:parameter>[\s\S]*?<\/ac:structured-macro>/g, (_, filename) => {
-      return `\n📎 [${filename}](${attachmentsDir}/${filename})\n`;
-    });
-    // Also handle view-file with height parameter (which might appear after name)
-    markdown = markdown.replace(/<ac:structured-macro ac:name="view-file"[^>]*>[\s\S]*?<ac:parameter ac:name="name">[\s\S]*?<ri:attachment\s+ri:filename="([^"]+)"[^>]*\/>[\s\S]*?<\/ac:parameter>[\s\S]*?<ac:parameter ac:name="height">([^<]*)<\/ac:parameter>[\s\S]*?<\/ac:structured-macro>/g, (_, filename, _height) => {
-      return `\n📎 [${filename}](${attachmentsDir}/${filename})\n`;
-    });
-    // Remove layout macros but preserve content
-    markdown = markdown.replace(/<ac:layout>/g, '');
-    markdown = markdown.replace(/<\/ac:layout>/g, '');
-    markdown = markdown.replace(/<ac:layout-section[^>]*>/g, '');
-    markdown = markdown.replace(/<\/ac:layout-section>/g, '');
-    markdown = markdown.replace(/<ac:layout-cell[^>]*>/g, '');
-    markdown = markdown.replace(/<\/ac:layout-cell>/g, '');
-    // Remove other unhandled macros (replace with empty string for now)
-    markdown = markdown.replace(/<ac:structured-macro[^>]*>[\s\S]*?<\/ac:structured-macro>/g, '');
-    // Convert external URL links
-    markdown = markdown.replace(/<ac:link><ri:url ri:value="([^"]*)" \/><ac:plain-text-link-body><!\[CDATA\[([^\]]*)\]\]><\/ac:plain-text-link-body><\/ac:link>/g, '[$2]($1)');
-    // Convert internal page links - extract page title
-    // Format: <ac:link><ri:page ri:space-key="xxx" ri:content-title="Page Title" /></ac:link>
-    markdown = markdown.replace(/<ac:link>\s*<ri:page[^>]*ri:content-title="([^"]*)"[^>]*\/>\s*<\/ac:link>/g, '[$1]');
-    markdown = markdown.replace(/<ac:link>\s*<ri:page[^>]*ri:content-title="([^"]*)"[^>]*>\s*<\/ri:page>\s*<\/ac:link>/g, '[$1]');
-    // Convert internal page links with custom display text (ac:link-body)
-    markdown = markdown.replace(/<ac:link[^>]*>[\s\S]*?<ac:link-body>([\s\S]*?)<\/ac:link-body>[\s\S]*?<\/ac:link>/g, '$1');
-    // Remove any remaining ac:link tags that weren't matched (including those with attributes)
-    markdown = markdown.replace(/<ac:link[^>]*>[\s\S]*?<\/ac:link>/g, '');
-    // Convert remaining HTML to markdown
-    markdown = this.htmlToMarkdown(markdown);
-    return markdown;
+    return this.converter.storageToMarkdown(storage, options);
   }
-  /**
-   * Convert basic HTML to markdown
-   */
   htmlToMarkdown(html) {
-    let markdown = html;
-    // Convert time elements to date text BEFORE removing attributes
-    // Format: <time datetime="2025-09-16" /> or <time datetime="2025-09-16"></time>
-    markdown = markdown.replace(/<time\s+datetime="([^"]+)"[^>]*(?:\/>|>\s*<\/time>)/g, '$1');
-    // Convert strong/bold BEFORE removing HTML attributes
-    markdown = markdown.replace(/<strong[^>]*>(.*?)<\/strong>/g, '**$1**');
-    // Convert emphasis/italic BEFORE removing HTML attributes
-    markdown = markdown.replace(/<em[^>]*>(.*?)<\/em>/g, '*$1*');
-    // Convert code BEFORE removing HTML attributes
-    markdown = markdown.replace(/<code[^>]*>(.*?)<\/code>/g, '`$1`');
-    // Remove HTML attributes from tags (but preserve content formatting)
-    markdown = markdown.replace(/<(\w+)[^>]*>/g, '<$1>');
-    markdown = markdown.replace(/<\/(\w+)[^>]*>/g, '</$1>');
-    // Convert headings first (they don't contain other elements typically)
-    markdown = markdown.replace(/<h([1-6])>(.*?)<\/h[1-6]>/g, (_, level, text) => {
-      return '\n' + '#'.repeat(parseInt(level)) + ' ' + text.trim() + '\n';
-    });
-    // Convert tables BEFORE paragraphs
-    markdown = markdown.replace(/<table>(.*?)<\/table>/gs, (_, content) => {
-      const rows = [];
-      let isHeader = true;
-      // Extract table rows
-      const rowMatches = content.match(/<tr>(.*?)<\/tr>/gs);
-      if (rowMatches) {
-        rowMatches.forEach(rowMatch => {
-          const cells = [];
-          const cellContent = rowMatch.replace(/<tr>(.*?)<\/tr>/s, '$1');
-          // Extract cells (th or td)
-          const cellMatches = cellContent.match(/<t[hd]>(.*?)<\/t[hd]>/gs);
-          if (cellMatches) {
-            cellMatches.forEach(cellMatch => {
-              let cellText = cellMatch.replace(/<t[hd]>(.*?)<\/t[hd]>/s, '$1');
-              // Clean up cell content - remove nested HTML but preserve text and some formatting
-              cellText = cellText.replace(/<p>/g, '').replace(/<\/p>/g, ' ');
-              cellText = cellText.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
-              cells.push(cellText || ' ');
-            });
-          }
-          if (cells.length > 0) {
-            rows.push('| ' + cells.join(' | ') + ' |');
-            if (isHeader) {
-              rows.push('| ' + cells.map(() => '---').join(' | ') + ' |');
-              isHeader = false;
-            }
-          }
-        });
-      }
-      return rows.length > 0 ? '\n' + rows.join('\n') + '\n' : '';
-    });
-    // Convert unordered lists BEFORE paragraphs
-    markdown = markdown.replace(/<ul>(.*?)<\/ul>/gs, (_, content) => {
-      let listItems = '';
-      const itemMatches = content.match(/<li>(.*?)<\/li>/gs);
-      if (itemMatches) {
-        itemMatches.forEach(itemMatch => {
-          let itemText = itemMatch.replace(/<li>(.*?)<\/li>/s, '$1');
-          // Clean up nested HTML but preserve some formatting
-          itemText = itemText.replace(/<p>/g, '').replace(/<\/p>/g, ' ');
-          itemText = itemText.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
-          if (itemText) {
-            listItems += '- ' + itemText + '\n';
-          }
-        });
-      }
-      return '\n' + listItems;
-    });
-    // Convert ordered lists BEFORE paragraphs
-    markdown = markdown.replace(/<ol>(.*?)<\/ol>/gs, (_, content) => {
-      let listItems = '';
-      let counter = 1;
-      const itemMatches = content.match(/<li>(.*?)<\/li>/gs);
-      if (itemMatches) {
-        itemMatches.forEach(itemMatch => {
-          let itemText = itemMatch.replace(/<li>(.*?)<\/li>/s, '$1');
-          // Clean up nested HTML but preserve some formatting
-          itemText = itemText.replace(/<p>/g, '').replace(/<\/p>/g, ' ');
-          itemText = itemText.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
-          if (itemText) {
-            listItems += `${counter++}. ${itemText}\n`;
-          }
-        });
-      }
-      return '\n' + listItems;
-    });
-    // Convert paragraphs (after lists and tables)
-    markdown = markdown.replace(/<p>(.*?)<\/p>/gs, (_, content) => {
-      return '\n' + content.trim() + '\n';
-    });
-    // Convert line breaks
-    markdown = markdown.replace(/<br\s*\/?>/g, '\n');
-    // Convert horizontal rules
-    markdown = markdown.replace(/<hr\s*\/?>/g, '\n---\n');
-    // Remove any remaining HTML tags, but preserve <details> and <summary> for GFM compatibility
-    markdown = markdown.replace(/<(?!\/?(details|summary)\b)[^>]+>/g, ' ');
-    // Clean up whitespace and HTML entities
-    markdown = markdown.replace(/&nbsp;/g, ' ');
-    markdown = markdown.replace(/&lt;/g, '<');
-    markdown = markdown.replace(/&gt;/g, '>');
-    markdown = markdown.replace(/&amp;/g, '&');
-    markdown = markdown.replace(/&quot;/g, '"');
-    markdown = markdown.replace(/&apos;/g, '\'');
-    // Smart quotes and special characters
-    markdown = markdown.replace(/&ldquo;/g, '"');
-    markdown = markdown.replace(/&rdquo;/g, '"');
-    markdown = markdown.replace(/&lsquo;/g, '\'');
-    markdown = markdown.replace(/&rsquo;/g, '\'');
-    markdown = markdown.replace(/&mdash;/g, '—');
-    markdown = markdown.replace(/&ndash;/g, '–');
-    markdown = markdown.replace(/&hellip;/g, '...');
-    markdown = markdown.replace(/&bull;/g, '•');
-    markdown = markdown.replace(/&copy;/g, '©');
-    markdown = markdown.replace(/&reg;/g, '®');
-    markdown = markdown.replace(/&trade;/g, '™');
-    // Numeric HTML entities
-    markdown = markdown.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)));
-    markdown = markdown.replace(/&#x([0-9a-fA-F]+);/g, (_, code) => String.fromCharCode(parseInt(code, 16)));
-    // Clean up nordic alphabets and other named entities
-    markdown = markdown.replace(/&([a-zA-Z]+);/g, (match, name) => NAMED_ENTITIES[name] || match);
-    // Clean up extra whitespace for standard Markdown format
-    // Remove trailing spaces from each line
-    markdown = markdown.replace(/[ \t]+$/gm, '');
-    // Remove leading spaces from lines (except for code blocks, blockquotes, and list items)
-    markdown = markdown.replace(/^[ \t]+(?!([`>]|[*+-] |\d+[.)] ))/gm, '');
-    // Ensure proper spacing after headings (# Title should be followed by blank line or content)
-    markdown = markdown.replace(/^(#{1,6}[^\n]+)\n(?!\n)/gm, '$1\n\n');
-    // Normalize multiple blank lines to double newline
-    markdown = markdown.replace(/\n\s*\n\s*\n+/g, '\n\n');
-    // Collapse multiple spaces to single space (but preserve newlines)
-    markdown = markdown.replace(/[ \t]+/g, ' ');
-    // Final trim
-    markdown = markdown.trim();
-    return markdown;
+    return htmlToMarkdown(html);
   }
   /**
@@ -1936,48 +1418,61 @@ class ConfluenceClient {
   /**
    * Get child pages of a given page
    */
-  async getChildPages(pageId, limit = 500) {
+  async getChildPages(pageId, limit = 500, options = {}) {
+    const includeAncestors = Boolean(options.includeAncestors);
     const response = await this.client.get(`/content/${pageId}/child/page`, {
       params: {
         limit: limit,
         // Fetch lightweight payload; content fetched on-demand when copying
-        expand: 'space,version'
+        expand: includeAncestors ? 'space,version,ancestors' : 'space,version'
       }
     });
-    return response.data.results.map(page => ({
-      id: page.id,
-      title: page.title,
-      type: page.type,
-      status: page.status,
-      space: page.space,
-      version: page.version?.number || 1
+    return response.data.results.map(page => this.normalizePage(page, {
+      parentId: pageId,
+      depth: 1
     }));
   }
   /**
    * Get all descendant pages recursively
    */
-  async getAllDescendantPages(pageId, maxDepth = 10, currentDepth = 0) {
+  async getAllDescendantPages(pageId, maxDepth = 10, currentDepth = 0, options = {}) {
+    if (typeof currentDepth === 'object' && currentDepth !== null) {
+      options = currentDepth;
+      currentDepth = 0;
+    }
+    const semaphore = createSemaphore(10);
+    return this._collectDescendants(pageId, maxDepth, currentDepth, semaphore, options);
+  }
+  async _collectDescendants(pageId, maxDepth, currentDepth, semaphore, options = {}) {
     if (currentDepth >= maxDepth) {
       return [];
     }
-    const children = await this.getChildPages(pageId);
-    // Attach parentId so we can later reconstruct hierarchy if needed
-    const childrenWithParent = children.map(child => ({ ...child, parentId: pageId }));
-    let allDescendants = [...childrenWithParent];
-    for (const child of children) {
-      const grandChildren = await this.getAllDescendantPages(
-        child.id,
-        maxDepth,
-        currentDepth + 1
-      );
-      allDescendants = allDescendants.concat(grandChildren);
+    await semaphore.acquire();
+    let children;
+    try {
+      children = await this.getChildPages(pageId, 500, options);
+    } finally {
+      semaphore.release();
     }
-    return allDescendants;
+    // Track depth for recursive JSON output while preserving direct parent linkage.
+    const childrenWithDepth = children.map(child => ({
+      ...child,
+      parentId: child.parentId || String(pageId),
+      depth: currentDepth + 1
+    }));
+    const grandChildrenLists = await Promise.all(
+      children.map(child =>
+        this._collectDescendants(child.id, maxDepth, currentDepth + 1, semaphore, options)
+      )
+    );
+    return childrenWithDepth.concat(...grandChildrenLists);
   }
   /**
@@ -2197,12 +1692,104 @@ class ConfluenceClient {
     };
   }
+  normalizeUser(user) {
+    if (!user) {
+      return null;
+    }
+    return {
+      displayName: user.displayName || user.publicName || user.username || user.userKey || user.accountId || 'Unknown',
+      accountId: user.accountId,
+      userKey: user.userKey,
+      username: user.username,
+      email: user.email
+    };
+  }
+  normalizeAncestors(rawAncestors = []) {
+    if (!Array.isArray(rawAncestors)) {
+      return [];
+    }
+    return rawAncestors.map((ancestor) => {
+      const id = ancestor?.id ?? ancestor;
+      return {
+        id: id !== undefined && id !== null ? String(id) : null,
+        type: ancestor?.type || null,
+        title: ancestor?.title || null
+      };
+    }).filter((ancestor) => ancestor.id);
+  }
+  normalizeSpace(space) {
+    if (!space) {
+      return null;
+    }
+    return {
+      key: space.key || null,
+      name: space.name || null
+    };
+  }
+  getPageParentId(ancestors = []) {
+    if (!Array.isArray(ancestors) || ancestors.length === 0) {
+      return null;
+    }
+    return ancestors[ancestors.length - 1].id || null;
+  }
+  normalizePage(raw, overrides = {}) {
+    const space = overrides.space === undefined
+      ? this.normalizeSpace(raw?.space)
+      : this.normalizeSpace(overrides.space);
+    const ancestors = overrides.ancestors || this.normalizeAncestors(raw?.ancestors);
+    const spaceKey = overrides.spaceKey || space?.key || null;
+    const id = raw?.id !== undefined && raw?.id !== null ? String(raw.id) : null;
+    const webui = raw?._links?.webui || null;
+    const linksBase = raw?._links?.base || null;
+    const fallbackUrl = (spaceKey && id)
+      ? `${this.webUrlPrefix}/spaces/${spaceKey}/pages/${id}`
+      : null;
+    return {
+      id,
+      title: raw?.title || '',
+      type: raw?.type || null,
+      status: raw?.status || null,
+      space,
+      spaceKey,
+      parentId: overrides.parentId === undefined
+        ? this.getPageParentId(ancestors)
+        : (overrides.parentId === null ? null : String(overrides.parentId)),
+      version: overrides.version !== undefined ? overrides.version : (raw?.version?.number || null),
+      url: overrides.url || this.toAbsoluteUrl(webui, linksBase) || (fallbackUrl ? this.buildUrl(fallbackUrl) : null),
+      ancestors,
+      depth: overrides.depth,
+      author: overrides.author !== undefined ? overrides.author : this.normalizeUser(raw?.history?.createdBy),
+      lastUpdatedBy: overrides.lastUpdatedBy !== undefined ? overrides.lastUpdatedBy : this.normalizeUser(raw?.version?.by),
+      createdAt: overrides.createdAt !== undefined ? overrides.createdAt : (raw?.history?.createdDate || null),
+      updatedAt: overrides.updatedAt !== undefined ? overrides.updatedAt : (raw?.version?.when || null)
+    };
+  }
   buildUrl(path) {
     const normalized = path && !path.startsWith('/') ? `/${path}` : (path || '');
     return `${this.protocol}://${this.domain}${normalized}`;
   }
-  toAbsoluteUrl(pathOrUrl) {
+  joinBaseUrl(baseUrl, path) {
+    if (!baseUrl) {
+      return null;
+    }
+    const normalizedBase = baseUrl.endsWith('/') ? baseUrl.slice(0, -1) : baseUrl;
+    const normalizedPath = path.startsWith('/') ? path : `/${path}`;
+    return `${normalizedBase}${normalizedPath}`;
+  }
+  toAbsoluteUrl(pathOrUrl, baseUrl = null) {
     if (!pathOrUrl) {
       return null;
     }
@@ -2211,6 +1798,10 @@ class ConfluenceClient {
       return pathOrUrl;
     }
+    if (baseUrl) {
+      return this.joinBaseUrl(baseUrl, pathOrUrl);
+    }
     const pathWithPrefix = this.webUrlPrefix && !pathOrUrl.startsWith(this.webUrlPrefix)
       ? `${this.webUrlPrefix}${pathOrUrl}`
       : pathOrUrl;
@@ -2242,8 +1833,8 @@ class ConfluenceClient {
 ConfluenceClient.createLocalConverter = function () {
   const instance = Object.create(ConfluenceClient.prototype);
-  instance.markdown = new MarkdownIt();
-  instance.setupConfluenceMarkdownExtensions();
+  instance.converter = new MacroConverter();
+  instance.markdown = instance.converter.markdown;
   return instance;
 };