npm - n8n-nodes-notion-advanced - Versions diffs - 1.2.7-beta → 1.2.9-beta - Mend

n8n-nodes-notion-advanced 1.2.7-beta → 1.2.9-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/nodes/NotionAdvanced/NotionAITool.node.d.ts +40 -1
package/dist/nodes/NotionAdvanced/NotionAITool.node.js +480 -120
package/dist/package.json +1 -1
package/package.json +1 -1
package/dist/nodes/NotionAdvanced/NotionAITool.node.ts +0 -611
package/dist/nodes/NotionAdvanced/NotionAdvanced.node.ts +0 -1022
package/dist/nodes/NotionAdvanced/NotionUtils.ts +0 -588
package/dist/nodes/NotionAdvanced/notion.svg +0 -3
package/dist/types/NotionTypes.ts +0 -411

package/dist/nodes/NotionAdvanced/NotionAITool.node.js CHANGED Viewed

@@ -2,6 +2,7 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.NotionAITool = void 0;
 const n8n_workflow_1 = require("n8n-workflow");
+const crypto_1 = require("crypto");
 const NotionUtils_1 = require("./NotionUtils");
 class NotionAITool {
     constructor() {
@@ -455,8 +456,8 @@ class NotionAITool {
         for (let i = 0; i < lines.length; i++) {
             const line = lines[i];
             const trimmedLine = line.trim();
-            // Skip completely empty lines and XML placeholders
-            if (!trimmedLine || trimmedLine.startsWith('__XML_BLOCK_'))
+            // Skip completely empty lines and XML placeholders (now using dynamic prefix check)
+            if (!trimmedLine || /__XML_[a-f0-9]{8}_\d+__/.test(trimmedLine))
                 continue;
             // Traditional markdown patterns (for backwards compatibility)
             if (trimmedLine.startsWith('# ')) {
@@ -641,328 +642,557 @@ class NotionAITool {
         }
         return blocks;
     }
-    // New XML-like tag processing function
+    // Helper function to resolve overlapping tag matches
+    static resolveOverlaps(matches) {
+        const resolved = [];
+        const sorted = matches.sort((a, b) => {
+            if (a.start !== b.start)
+                return a.start - b.start;
+            return (b.end - b.start) - (a.end - a.start); // Prefer longer matches
+        });
+        for (const match of sorted) {
+            const hasOverlap = resolved.some(existing => (match.start < existing.end && match.end > existing.start));
+            if (!hasOverlap) {
+                resolved.push(match);
+            }
+        }
+        return resolved;
+    }
+    // Helper function to validate XML tag structure
+    static validateXmlTag(match, tagName) {
+        try {
+            // Basic validation for well-formed tags
+            const openTag = new RegExp(`<${tagName}[^>]*>`, 'i');
+            const closeTag = new RegExp(`</${tagName}>`, 'i');
+            if (!openTag.test(match) || !closeTag.test(match)) {
+                console.warn(`Malformed XML tag detected: ${match.substring(0, 50)}...`);
+                return false;
+            }
+            return true;
+        }
+        catch (error) {
+            console.warn(`Error validating XML tag: ${error}`);
+            return false;
+        }
+    }
+    // Helper function for optimized string replacement
+    static optimizedReplace(content, matches) {
+        if (matches.length === 0)
+            return content;
+        const parts = [];
+        let lastIndex = 0;
+        matches.forEach(({ start, end, replacement }) => {
+            parts.push(content.substring(lastIndex, start));
+            parts.push(replacement);
+            lastIndex = end;
+        });
+        parts.push(content.substring(lastIndex));
+        return parts.join('');
+    }
+    // Helper function for Unicode-safe position calculation
+    static getUtf8BytePosition(str, charIndex) {
+        try {
+            return Buffer.from(str.substring(0, charIndex), 'utf8').length;
+        }
+        catch (error) {
+            // Fallback to character index if Buffer operations fail
+            return charIndex;
+        }
+    }
+    // Build hierarchical XML tree structure
+    static buildXMLTree(content, tagProcessors) {
+        const allMatches = [];
+        // Collect all XML tags with their positions
+        tagProcessors.forEach(({ regex, blockCreator, listProcessor }) => {
+            var _a;
+            const globalRegex = new RegExp(regex.source, 'gis');
+            let match;
+            while ((match = globalRegex.exec(content)) !== null) {
+                const tagName = ((_a = match[0].match(/<(\w+)/)) === null || _a === void 0 ? void 0 : _a[1]) || 'unknown';
+                allMatches.push({
+                    id: `${tagName}_${match.index}_${Date.now()}_${Math.random()}`,
+                    tagName,
+                    start: match.index,
+                    end: match.index + match[0].length,
+                    match: match[0],
+                    processor: blockCreator,
+                    groups: match.slice(1),
+                    children: [],
+                    depth: 0,
+                    innerContent: match[0],
+                    replacement: undefined,
+                    listProcessor
+                });
+            }
+        });
+        // Sort by start position
+        allMatches.sort((a, b) => a.start - b.start);
+        // Build parent-child relationships
+        const rootNodes = [];
+        const nodeStack = [];
+        for (const node of allMatches) {
+            // Pop nodes from stack that don't contain this node
+            while (nodeStack.length > 0 && nodeStack[nodeStack.length - 1].end <= node.start) {
+                nodeStack.pop();
+            }
+            // Set depth based on stack size
+            node.depth = nodeStack.length;
+            // If there's a parent on the stack, add this as its child
+            if (nodeStack.length > 0) {
+                const parent = nodeStack[nodeStack.length - 1];
+                node.parent = parent;
+                parent.children.push(node);
+            }
+            else {
+                // This is a root node
+                rootNodes.push(node);
+            }
+            // Only push self-contained tags to stack (not self-closing)
+            if (!node.match.endsWith('/>') && node.match.includes('</')) {
+                nodeStack.push(node);
+            }
+        }
+        return rootNodes;
+    }
+    // Process XML tree depth-first (children before parents)
+    static processXMLTreeDepthFirst(nodes, blocks, placeholderPrefix) {
+        const replacements = new Map();
+        let blockCounter = 0;
+        const processNode = (node) => {
+            // First, process all children depth-first
+            for (const child of node.children) {
+                const childReplacement = processNode(child);
+                replacements.set(child.id, childReplacement);
+            }
+            // Extract inner content (content between opening and closing tags)
+            let innerContent = node.innerContent;
+            // Extract content between opening and closing tags
+            const openTagMatch = node.match.match(/^<[^>]+>/);
+            const closeTagMatch = node.match.match(/<\/[^>]+>$/);
+            if (openTagMatch && closeTagMatch) {
+                const openTag = openTagMatch[0];
+                const closeTag = closeTagMatch[0];
+                const startIndex = node.match.indexOf(openTag) + openTag.length;
+                const endIndex = node.match.lastIndexOf(closeTag);
+                innerContent = node.match.substring(startIndex, endIndex);
+                // Replace child nodes in inner content with their processed content
+                for (const child of node.children) {
+                    const childReplacement = replacements.get(child.id) || '';
+                    innerContent = innerContent.replace(child.match, childReplacement);
+                }
+            }
+            // Process this node with updated inner content
+            try {
+                // Handle special list processors
+                if (node.listProcessor && (node.tagName === 'ul' || node.tagName === 'ol')) {
+                    node.listProcessor(innerContent, blocks);
+                    return `${placeholderPrefix}${blockCounter++}__`;
+                }
+                // Use blockCreator to create the block
+                const block = node.processor(...node.groups);
+                if (block) {
+                    blocks.push(block);
+                }
+                return `${placeholderPrefix}${blockCounter++}__`;
+            }
+            catch (error) {
+                console.warn(`Error processing XML node ${node.tagName}:`, error);
+                return node.match; // Return original if processing fails
+            }
+        };
+        // Process all root nodes
+        for (const rootNode of nodes) {
+            const replacement = processNode(rootNode);
+            replacements.set(rootNode.id, replacement);
+        }
+        return replacements;
+    }
+    // Apply hierarchical replacements to content
+    static applyHierarchicalReplacements(content, nodes, replacements) {
+        let processedContent = content;
+        // Sort nodes by start position in reverse order to avoid position shifts
+        const allNodes = this.getAllNodesFromTree(nodes);
+        allNodes.sort((a, b) => b.start - a.start);
+        // Apply replacements from end to beginning
+        for (const node of allNodes) {
+            const replacement = replacements.get(node.id);
+            if (replacement !== undefined) {
+                processedContent = processedContent.substring(0, node.start) +
+                    replacement +
+                    processedContent.substring(node.end);
+            }
+        }
+        return processedContent;
+    }
+    // Helper function to get all nodes from tree (flattened)
+    static getAllNodesFromTree(nodes) {
+        const allNodes = [];
+        const collectNodes = (nodeList) => {
+            for (const node of nodeList) {
+                allNodes.push(node);
+                collectNodes(node.children);
+            }
+        };
+        collectNodes(nodes);
+        return allNodes;
+    }
+    // New hierarchical XML-like tag processing function
     static processXmlTags(content, blocks) {
         let processedContent = content;
-        let blockCounter = 0;
-        // Process XML-like tags in order of priority
+        // Generate unique placeholder prefix to avoid collisions
+        const placeholderPrefix = `__XML_${(0, crypto_1.randomUUID)().slice(0, 8)}_`;
+        // Debug mode for development
+        const DEBUG_ORDERING = process.env.NODE_ENV === 'development';
+        // Define all tag processors
         const tagProcessors = [
             // Callouts: <callout type="info">content</callout>
             {
                 regex: /<callout\s*(?:type="([^"]*)")?\s*>(.*?)<\/callout>/gis,
-                processor: (match, type = 'info', content) => {
+                blockCreator: (type = 'info', content) => {
                     const emoji = NotionAITool.getCalloutEmoji(type.toLowerCase());
                     const color = NotionAITool.getCalloutColor(type.toLowerCase());
-                    blocks.push({
+                    return {
                         type: 'callout',
                         callout: {
                             rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
                             icon: { type: 'emoji', emoji },
                             color: color,
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // Code blocks: <code language="javascript">content</code>
             {
                 regex: /<code\s*(?:language="([^"]*)")?\s*>(.*?)<\/code>/gis,
-                processor: (match, language = 'plain_text', content) => {
-                    blocks.push({
+                blockCreator: (language = 'plain_text', content) => {
+                    return {
                         type: 'code',
                         code: {
                             rich_text: [(0, NotionUtils_1.createRichText)(content.trim())],
                             language: language === 'plain text' ? 'plain_text' : language,
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // Images: <image src="url" alt="description">caption</image>
             {
                 regex: /<image\s+src="([^"]*)"(?:\s+alt="([^"]*)")?\s*>(.*?)<\/image>/gis,
-                processor: (match, src, alt = '', caption = '') => {
+                blockCreator: (src, alt = '', caption = '') => {
                     const captionText = caption.trim() || alt;
-                    blocks.push({
+                    return {
                         type: 'image',
                         image: {
                             type: 'external',
                             external: { url: src },
                             caption: captionText ? NotionAITool.parseBasicMarkdown(captionText) : [],
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // Self-closing images: <image src="url" alt="description"/>
             {
                 regex: /<image\s+src="([^"]*)"(?:\s+alt="([^"]*)")?\s*\/>/gis,
-                processor: (match, src, alt = '') => {
-                    blocks.push({
+                blockCreator: (src, alt = '') => {
+                    return {
                         type: 'image',
                         image: {
                             type: 'external',
                             external: { url: src },
                             caption: alt ? NotionAITool.parseBasicMarkdown(alt) : [],
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // Equations: <equation>E=mc^2</equation>
             {
                 regex: /<equation>(.*?)<\/equation>/gis,
-                processor: (match, expression) => {
-                    blocks.push({
+                blockCreator: (expression) => {
+                    return {
                         type: 'equation',
                         equation: {
                             expression: expression.trim(),
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // Embeds: <embed>url</embed>
             {
                 regex: /<embed>(.*?)<\/embed>/gis,
-                processor: (match, url) => {
-                    blocks.push({
+                blockCreator: (url) => {
+                    return {
                         type: 'embed',
                         embed: {
                             url: url.trim(),
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // Bookmarks: <bookmark>url</bookmark>
             {
                 regex: /<bookmark>(.*?)<\/bookmark>/gis,
-                processor: (match, url) => {
-                    blocks.push({
+                blockCreator: (url) => {
+                    return {
                         type: 'bookmark',
                         bookmark: {
                             url: url.trim(),
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // Toggles: <toggle>title</toggle>
             {
                 regex: /<toggle>(.*?)<\/toggle>/gis,
-                processor: (match, title) => {
-                    blocks.push({
+                blockCreator: (title) => {
+                    return {
                         type: 'toggle',
                         toggle: {
                             rich_text: NotionAITool.parseBasicMarkdown(title.trim()),
                             children: [],
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // Quotes: <quote>content</quote>
             {
                 regex: /<quote>(.*?)<\/quote>/gis,
-                processor: (match, content) => {
-                    blocks.push({
+                blockCreator: (content) => {
+                    return {
                         type: 'quote',
                         quote: {
                             rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // Dividers: <divider/> or <divider></divider>
             {
                 regex: /<divider\s*\/?>/gis,
-                processor: (match) => {
-                    blocks.push({
+                blockCreator: () => {
+                    return {
                         type: 'divider',
                         divider: {},
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // To-do items: <todo checked="true">content</todo>
             {
                 regex: /<todo\s*(?:checked="([^"]*)")?\s*>(.*?)<\/todo>/gis,
-                processor: (match, checked = 'false', content) => {
+                blockCreator: (checked = 'false', content) => {
                     const isChecked = checked.toLowerCase() === 'true';
-                    blocks.push({
+                    return {
                         type: 'to_do',
                         to_do: {
                             rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
                             checked: isChecked,
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // Headings: <h1>content</h1>, <h2>content</h2>, <h3>content</h3>
             {
                 regex: /<h([123])>(.*?)<\/h[123]>/gis,
-                processor: (match, level, content) => {
+                blockCreator: (level, content) => {
                     const headingType = `heading_${level}`;
-                    blocks.push({
+                    return {
                         type: headingType,
                         [headingType]: {
                             rich_text: [(0, NotionUtils_1.createRichText)(content.trim())],
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // Paragraphs: <p>content</p>
             {
                 regex: /<p>(.*?)<\/p>/gis,
-                processor: (match, content) => {
-                    blocks.push({
+                blockCreator: (content) => {
+                    return {
                         type: 'paragraph',
                         paragraph: {
                             rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // Process complete bulleted lists first: <ul><li>item</li></ul>
             {
                 regex: /<ul\s*[^>]*>(.*?)<\/ul>/gis,
-                processor: (match, listContent) => {
-                    // Extract individual list items and process them
-                    const items = listContent.match(/<li\s*[^>]*>(.*?)<\/li>/gis) || [];
-                    items.forEach(item => {
-                        const itemContent = item.replace(/<\/?li[^>]*>/gi, '').trim();
-                        if (itemContent) {
-                            blocks.push({
-                                type: 'bulleted_list_item',
-                                bulleted_list_item: {
-                                    rich_text: NotionAITool.parseBasicMarkdown(itemContent),
-                                },
-                            });
-                        }
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                blockCreator: (listContent) => {
+                    // This will be handled specially in hierarchical processing
+                    return null;
+                },
+                listProcessor: (listContent, blocks) => {
+                    NotionAITool.processNestedList(listContent, 'bulleted_list_item', blocks);
                 }
             },
             // Process complete numbered lists first: <ol><li>item</li></ol>
             {
                 regex: /<ol\s*[^>]*>(.*?)<\/ol>/gis,
-                processor: (match, listContent) => {
-                    // Extract individual list items and process them
-                    const items = listContent.match(/<li\s*[^>]*>(.*?)<\/li>/gis) || [];
-                    items.forEach(item => {
-                        const itemContent = item.replace(/<\/?li[^>]*>/gi, '').trim();
-                        if (itemContent) {
-                            blocks.push({
-                                type: 'numbered_list_item',
-                                numbered_list_item: {
-                                    rich_text: NotionAITool.parseBasicMarkdown(itemContent),
-                                },
-                            });
-                        }
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                blockCreator: (listContent) => {
+                    // This will be handled specially in hierarchical processing
+                    return null;
+                },
+                listProcessor: (listContent, blocks) => {
+                    NotionAITool.processNestedList(listContent, 'numbered_list_item', blocks);
                 }
             },
             // Blockquotes: <blockquote>content</blockquote>
             {
                 regex: /<blockquote>(.*?)<\/blockquote>/gis,
-                processor: (match, content) => {
-                    blocks.push({
+                blockCreator: (content) => {
+                    return {
                         type: 'quote',
                         quote: {
                             rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // Preformatted text: <pre>content</pre>
             {
                 regex: /<pre>(.*?)<\/pre>/gis,
-                processor: (match, content) => {
-                    blocks.push({
+                blockCreator: (content) => {
+                    return {
                         type: 'code',
                         code: {
                             rich_text: [(0, NotionUtils_1.createRichText)(content.trim())],
                             language: 'plain_text',
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // Standalone list items (only if not already processed in lists): <li>content</li>
             {
                 regex: /<li\s*[^>]*>(.*?)<\/li>/gis,
-                processor: (match, content) => {
+                blockCreator: (content) => {
                     if (content.trim()) {
-                        blocks.push({
+                        return {
                             type: 'bulleted_list_item',
                             bulleted_list_item: {
                                 rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
                             },
-                        });
+                        };
                     }
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    return null;
                 }
             },
             // Strong/Bold: <strong>content</strong> or <b>content</b> (only as standalone)
             {
                 regex: /(?:^|>|\s)<(strong|b)>(.*?)<\/(strong|b)>(?=<|$|\s)/gis,
-                processor: (match, tag, content) => {
-                    blocks.push({
+                blockCreator: (tag, content) => {
+                    return {
                         type: 'paragraph',
                         paragraph: {
                             rich_text: NotionAITool.parseBasicMarkdown(`**${content.trim()}**`),
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // Emphasis/Italic: <em>content</em> or <i>content</i> (only as standalone)
             {
                 regex: /(?:^|>|\s)<(em|i)>(.*?)<\/(em|i)>(?=<|$|\s)/gis,
-                processor: (match, tag, content) => {
-                    blocks.push({
+                blockCreator: (tag, content) => {
+                    return {
                         type: 'paragraph',
                         paragraph: {
                             rich_text: NotionAITool.parseBasicMarkdown(`*${content.trim()}*`),
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
             // Line breaks: <br/> or <br>
             {
                 regex: /<br\s*\/?>/gis,
-                processor: (match) => {
-                    blocks.push({
+                blockCreator: () => {
+                    return {
                         type: 'paragraph',
                         paragraph: {
                             rich_text: [(0, NotionUtils_1.createRichText)('')],
                         },
-                    });
-                    return `__XML_BLOCK_${blockCounter++}__`;
+                    };
                 }
             },
         ];
-        // Process each tag type
-        tagProcessors.forEach(({ regex, processor }) => {
-            processedContent = processedContent.replace(regex, (match, group1, group2, group3) => {
-                return processor(match, group1 || '', group2 || '', group3 || '');
+        try {
+            // Step 1: Build hierarchical XML tree
+            const xmlTree = NotionAITool.buildXMLTree(processedContent, tagProcessors);
+            if (DEBUG_ORDERING && xmlTree.length > 0) {
+                console.log('XML Tree Structure:', xmlTree.map(node => ({
+                    tag: node.tagName,
+                    depth: node.depth,
+                    children: node.children.length,
+                    start: node.start
+                })));
+            }
+            // Step 2: Process tree depth-first (children before parents)
+            const replacements = NotionAITool.processXMLTreeDepthFirst(xmlTree, blocks, placeholderPrefix);
+            // Step 3: Apply hierarchical replacements to content
+            processedContent = NotionAITool.applyHierarchicalReplacements(processedContent, xmlTree, replacements);
+            // Step 4: Clean up any remaining HTML tags
+            processedContent = NotionAITool.cleanupRemainingHtml(processedContent, placeholderPrefix);
+            if (DEBUG_ORDERING) {
+                console.log(`Processed ${xmlTree.length} root XML nodes hierarchically, created ${blocks.length} blocks`);
+            }
+        }
+        catch (error) {
+            console.warn('Error in hierarchical XML processing, falling back to linear processing:', error);
+            // Fallback to linear processing if hierarchical fails
+            const allMatches = [];
+            tagProcessors.forEach(({ regex, blockCreator }) => {
+                const globalRegex = new RegExp(regex.source, 'gis');
+                let match;
+                while ((match = globalRegex.exec(processedContent)) !== null) {
+                    allMatches.push({
+                        start: match.index,
+                        end: match.index + match[0].length,
+                        match: match[0],
+                        processor: (match, group1, group2, group3) => {
+                            try {
+                                const block = blockCreator(group1 || '', group2 || '', group3 || '');
+                                if (block) {
+                                    blocks.push(block);
+                                }
+                                return `${placeholderPrefix}${Math.random()}__`;
+                            }
+                            catch (error) {
+                                console.warn('Error in fallback processor:', error);
+                                return match;
+                            }
+                        },
+                        groups: match.slice(1)
+                    });
+                }
             });
-        });
-        // Clean up any remaining HTML tags that weren't processed
-        processedContent = NotionAITool.cleanupRemainingHtml(processedContent);
+            const resolvedMatches = NotionAITool.resolveOverlaps(allMatches);
+            resolvedMatches.sort((a, b) => a.start - b.start);
+            const processedMatches = resolvedMatches.map(({ start, end, match, processor, groups }) => {
+                try {
+                    const replacement = processor(match, groups[0] || '', groups[1] || '', groups[2] || '');
+                    return { start, end, replacement, match };
+                }
+                catch (error) {
+                    return { start, end, replacement: match, match };
+                }
+            });
+            if (processedMatches.length > 0) {
+                processedContent = NotionAITool.optimizedReplace(processedContent, processedMatches);
+            }
+            processedContent = NotionAITool.cleanupRemainingHtml(processedContent, placeholderPrefix);
+        }
         return processedContent;
     }
-    // Cleanup function to remove remaining HTML tags
-    static cleanupRemainingHtml(content) {
+    // Cleanup function to remove remaining HTML tags and XML_BLOCK artifacts
+    static cleanupRemainingHtml(content, placeholderPrefix) {
         let cleaned = content;
+        // Remove XML_BLOCK placeholder artifacts (support both old and new format)
+        if (placeholderPrefix) {
+            const placeholderRegex = new RegExp(`${placeholderPrefix.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\d+__`, 'g');
+            cleaned = cleaned.replace(placeholderRegex, '');
+        }
+        else {
+            // Fallback for backward compatibility
+            cleaned = cleaned.replace(/__XML_BLOCK_\d+__/g, '');
+            cleaned = cleaned.replace(/__XML_[a-f0-9]{8}_\d+__/g, '');
+        }
         // Remove common HTML tags that might be left behind
         const htmlTagsToRemove = [
             /<\/?ul\s*[^>]*>/gi,
@@ -974,6 +1204,13 @@ class NotionAITool {
             /<\/?i\s*[^>]*>/gi,
             /<\/?div\s*[^>]*>/gi,
             /<\/?span\s*[^>]*>/gi,
+            /<\/?p\s*[^>]*>/gi,
+            /<\/?a\s*[^>]*>/gi,
+            /<\/?code\s*[^>]*>/gi,
+            /<\/?u\s*[^>]*>/gi,
+            /<\/?s\s*[^>]*>/gi,
+            /<\/?del\s*[^>]*>/gi,
+            /<\/?mark\s*[^>]*>/gi,
             /<br\s*\/?>/gi,
         ];
         htmlTagsToRemove.forEach(regex => {
@@ -983,8 +1220,131 @@ class NotionAITool {
         cleaned = cleaned.replace(/^\s*[\r\n]/gm, '');
         // Remove multiple consecutive line breaks
         cleaned = cleaned.replace(/\n{3,}/g, '\n\n');
+        // Remove lines that contain only XML_BLOCK artifacts
+        cleaned = cleaned.replace(/^.*__XML_BLOCK_\d+__.*$/gm, '');
+        cleaned = cleaned.replace(/^.*__XML_[a-f0-9]{8}_\d+__.*$/gm, '');
         return cleaned.trim();
     }
+    // Helper function to process nested HTML elements in list items
+    static processNestedHtmlInListItem(content) {
+        let processed = content;
+        // First, remove wrapping <p> tags (common in nested content)
+        processed = processed.replace(/^<p\s*[^>]*>(.*?)<\/p>$/gis, '$1');
+        // Convert HTML formatting tags to markdown equivalents
+        const htmlToMarkdown = [
+            { regex: /<strong\s*[^>]*>(.*?)<\/strong>/gis, replacement: '**$1**' },
+            { regex: /<b\s*[^>]*>(.*?)<\/b>/gis, replacement: '**$1**' },
+            { regex: /<em\s*[^>]*>(.*?)<\/em>/gis, replacement: '*$1*' },
+            { regex: /<i\s*[^>]*>(.*?)<\/i>/gis, replacement: '*$1*' },
+            { regex: /<code\s*[^>]*>(.*?)<\/code>/gis, replacement: '`$1`' },
+            { regex: /<a\s+href="([^"]*)"[^>]*>(.*?)<\/a>/gis, replacement: '[$2]($1)' },
+            { regex: /<u\s*[^>]*>(.*?)<\/u>/gis, replacement: '$1' }, // Notion doesn't support underline
+            { regex: /<s\s*[^>]*>(.*?)<\/s>/gis, replacement: '~~$1~~' },
+            { regex: /<del\s*[^>]*>(.*?)<\/del>/gis, replacement: '~~$1~~' },
+            { regex: /<mark\s*[^>]*>(.*?)<\/mark>/gis, replacement: '$1' }, // Notion doesn't support highlight in rich text
+        ];
+        // Apply HTML to markdown conversions
+        htmlToMarkdown.forEach(({ regex, replacement }) => {
+            processed = processed.replace(regex, replacement);
+        });
+        // Remove any remaining HTML tags that we don't handle
+        const tagsToRemove = [
+            /<\/?div\s*[^>]*>/gi,
+            /<\/?span\s*[^>]*>/gi,
+            /<\/?p\s*[^>]*>/gi,
+            /<br\s*\/?>/gi,
+        ];
+        tagsToRemove.forEach(regex => {
+            processed = processed.replace(regex, ' ');
+        });
+        // Clean up extra whitespace
+        processed = processed.replace(/\s+/g, ' ').trim();
+        return processed;
+    }
+    // Helper function to process nested lists and flatten them for Notion
+    static processNestedList(listContent, listType, blocks) {
+        // Extract top-level list items using a more careful approach
+        const items = [];
+        let currentPos = 0;
+        while (currentPos < listContent.length) {
+            const liStart = listContent.indexOf('<li', currentPos);
+            if (liStart === -1)
+                break;
+            const liEndTag = listContent.indexOf('>', liStart);
+            if (liEndTag === -1)
+                break;
+            // Find the matching closing </li> tag, accounting for nested content
+            let depth = 1;
+            let searchPos = liEndTag + 1;
+            let liEnd = -1;
+            while (searchPos < listContent.length && depth > 0) {
+                const nextLiStart = listContent.indexOf('<li', searchPos);
+                const nextLiEnd = listContent.indexOf('</li>', searchPos);
+                if (nextLiEnd === -1)
+                    break;
+                if (nextLiStart !== -1 && nextLiStart < nextLiEnd) {
+                    depth++;
+                    searchPos = nextLiStart + 3;
+                }
+                else {
+                    depth--;
+                    if (depth === 0) {
+                        liEnd = nextLiEnd;
+                    }
+                    searchPos = nextLiEnd + 5;
+                }
+            }
+            if (liEnd === -1)
+                break;
+            // Extract the full <li>...</li> content
+            const fullItem = listContent.substring(liStart, liEnd + 5);
+            items.push(fullItem);
+            currentPos = liEnd + 5;
+        }
+        // Process each top-level item
+        items.forEach(item => {
+            // Remove the outer <li> tags
+            let itemContent = item.replace(/^<li[^>]*>/, '').replace(/<\/li>$/, '').trim();
+            // Check if this item contains nested lists
+            const hasNestedList = /<[uo]l\s*[^>]*>/i.test(itemContent);
+            if (hasNestedList) {
+                // Extract the text before the nested list
+                const beforeNestedList = itemContent.replace(/<[uo]l\s*[^>]*>.*$/is, '').trim();
+                if (beforeNestedList) {
+                    // Clean up and add the main item
+                    const cleanContent = NotionAITool.processNestedHtmlInListItem(beforeNestedList);
+                    if (cleanContent) {
+                        blocks.push({
+                            type: listType,
+                            [listType]: {
+                                rich_text: NotionAITool.parseBasicMarkdown(cleanContent),
+                            },
+                        });
+                    }
+                }
+                // Extract and process nested lists
+                const nestedListMatch = itemContent.match(/<([uo]l)\s*[^>]*>(.*?)<\/\1>/is);
+                if (nestedListMatch) {
+                    const [, nestedListTag, nestedContent] = nestedListMatch;
+                    const nestedListType = nestedListTag === 'ul' ? 'bulleted_list_item' : 'numbered_list_item';
+                    // Recursively process nested list
+                    NotionAITool.processNestedList(nestedContent, nestedListType, blocks);
+                }
+            }
+            else {
+                // Simple item without nested lists
+                const cleanContent = NotionAITool.processNestedHtmlInListItem(itemContent);
+                if (cleanContent) {
+                    blocks.push({
+                        type: listType,
+                        [listType]: {
+                            rich_text: NotionAITool.parseBasicMarkdown(cleanContent),
+                        },
+                    });
+                }
+            }
+        });
+    }
     // Helper function to get callout emoji based on type
     static getCalloutEmoji(type) {
         const emojiMap = {