npm - n8n-nodes-notion-advanced - Versions diffs - 1.2.28-beta → 1.2.29-beta - Mend

n8n-nodes-notion-advanced 1.2.28-beta → 1.2.29-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/nodes/NotionAdvanced/NotionAITool.node.d.ts +7 -0
package/nodes/NotionAdvanced/NotionAITool.node.js +273 -123
package/package.json +1 -1

package/nodes/NotionAdvanced/NotionAITool.node.d.ts CHANGED Viewed

@@ -55,6 +55,13 @@ export declare class NotionAITool implements INodeType {
     static processNestedHtmlInListItem(content: string): string;
     static convertInlineHtmlToMarkdown(content: string): string;
     static processNestedList(listContent: string, listType: 'bulleted_list_item' | 'numbered_list_item', blocks: IDataObject[]): void;
+    static extractListItemsWithBranching(content: string): Array<{
+        text: string;
+        children: Array<{
+            type: string;
+            content: string;
+        }>;
+    }>;
     static extractListItems(content: string): string[];
     static getCalloutEmoji(type: string): string;
     static getCalloutColor(type: string): string;

package/nodes/NotionAdvanced/NotionAITool.node.js CHANGED Viewed

@@ -745,40 +745,77 @@ class NotionAITool {
             return charIndex;
         }
     }
-    // Enhanced hierarchical XML tree structure that catches ALL XML content
+    // Enhanced hierarchical XML tree structure using depth-aware parsing
     static buildXMLTree(content, tagProcessors) {
         var _a;
         const allMatches = [];
-        const processedRanges = [];
-        // Step 1: Collect all XML tags with specific processors
+        // Step 1: Use depth-aware parsing for each tag processor
         tagProcessors.forEach(({ regex, blockCreator, listProcessor }) => {
             var _a;
-            const globalRegex = new RegExp(regex.source, 'gis');
-            let match;
-            while ((match = globalRegex.exec(content)) !== null) {
-                const tagName = ((_a = match[0].match(/<(\w+)/)) === null || _a === void 0 ? void 0 : _a[1]) || 'unknown';
-                const xmlNode = {
-                    id: `${tagName}_${match.index}_${Date.now()}_${Math.random()}`,
-                    tagName,
-                    start: match.index,
-                    end: match.index + match[0].length,
-                    match: match[0],
-                    processor: blockCreator,
-                    groups: match.slice(1),
-                    children: [],
-                    depth: 0,
-                    innerContent: match[0],
-                    replacement: undefined,
-                    listProcessor
-                };
-                allMatches.push(xmlNode);
-                processedRanges.push({ start: xmlNode.start, end: xmlNode.end });
+            const tagPattern = (_a = regex.source.match(/<(\w+)/)) === null || _a === void 0 ? void 0 : _a[1];
+            if (!tagPattern)
+                return;
+            // Find all opening tags of this type
+            let pos = 0;
+            while (pos < content.length) {
+                const openTagStart = content.indexOf(`<${tagPattern}`, pos);
+                if (openTagStart === -1)
+                    break;
+                const openTagEnd = content.indexOf('>', openTagStart);
+                if (openTagEnd === -1)
+                    break;
+                // Find matching closing tag using depth tracking
+                let depth = 1;
+                let searchPos = openTagEnd + 1;
+                let closeTagStart = -1;
+                const openPattern = `<${tagPattern}`;
+                const closePattern = `</${tagPattern}>`;
+                while (searchPos < content.length && depth > 0) {
+                    const nextOpen = content.indexOf(openPattern, searchPos);
+                    const nextClose = content.indexOf(closePattern, searchPos);
+                    if (nextClose === -1)
+                        break;
+                    if (nextOpen !== -1 && nextOpen < nextClose) {
+                        // Found nested opening tag
+                        depth++;
+                        searchPos = nextOpen + openPattern.length;
+                    }
+                    else {
+                        // Found closing tag
+                        depth--;
+                        if (depth === 0) {
+                            closeTagStart = nextClose;
+                            break;
+                        }
+                        searchPos = nextClose + closePattern.length;
+                    }
+                }
+                if (closeTagStart !== -1) {
+                    const fullMatch = content.substring(openTagStart, closeTagStart + closePattern.length);
+                    const innerContent = content.substring(openTagEnd + 1, closeTagStart);
+                    const xmlNode = {
+                        id: `${tagPattern}_${openTagStart}_${Date.now()}_${Math.random()}`,
+                        tagName: tagPattern,
+                        start: openTagStart,
+                        end: closeTagStart + closePattern.length,
+                        match: fullMatch,
+                        processor: blockCreator,
+                        groups: [innerContent], // For list processors, group[0] is the inner content
+                        children: [],
+                        depth: 0,
+                        innerContent,
+                        replacement: undefined,
+                        listProcessor
+                    };
+                    allMatches.push(xmlNode);
+                }
+                pos = openTagEnd + 1;
             }
         });
         // Step 2: Catch ANY remaining XML/HTML tags that weren't processed by specific processors
-        // This prevents ANY XML content from falling through to traditional processing
         const genericXmlRegex = /<[^>]+>[\s\S]*?<\/[^>]+>|<[^>]+\/>/gis;
         let genericMatch;
+        const processedRanges = allMatches.map(node => ({ start: node.start, end: node.end }));
         while ((genericMatch = genericXmlRegex.exec(content)) !== null) {
             const matchStart = genericMatch.index;
             const matchEnd = genericMatch.index + genericMatch[0].length;
@@ -792,7 +829,7 @@ class NotionAITool {
                     start: matchStart,
                     end: matchEnd,
                     match: genericMatch[0],
-                    processor: () => null, // Generic processor that just removes the content
+                    processor: () => null,
                     groups: [],
                     children: [],
                     depth: 0,
@@ -801,12 +838,11 @@ class NotionAITool {
                     listProcessor: undefined
                 };
                 allMatches.push(xmlNode);
-                processedRanges.push({ start: matchStart, end: matchEnd });
             }
         }
         // Sort by start position to maintain document order
         allMatches.sort((a, b) => a.start - b.start);
-        // Build parent-child relationships while preserving ordering
+        // Build parent-child relationships
         const rootNodes = [];
         const nodeStack = [];
         for (const node of allMatches) {
@@ -826,7 +862,7 @@ class NotionAITool {
                 // This is a root node
                 rootNodes.push(node);
             }
-            // Only push self-contained tags to stack (not self-closing)
+            // Push to stack for potential children
             if (!node.match.endsWith('/>') && node.match.includes('</')) {
                 nodeStack.push(node);
             }
@@ -1179,23 +1215,9 @@ class NotionAITool {
                     };
                 }
             },
-            // Standalone list items (only if not already processed in lists): <li>content</li>
-            {
-                regex: /<li\s*[^>]*>(.*?)<\/li>/gis,
-                blockCreator: (content) => {
-                    if (content.trim()) {
-                        // Convert HTML to markdown first, then parse to rich text
-                        const markdownContent = NotionAITool.convertInlineHtmlToMarkdown(content.trim());
-                        return {
-                            type: 'bulleted_list_item',
-                            bulleted_list_item: {
-                                rich_text: NotionAITool.parseBasicMarkdown(markdownContent),
-                            },
-                        };
-                    }
-                    return null;
-                }
-            },
+            // REMOVED: Standalone <li> processor
+            // <li> tags should ONLY be processed within <ul>/<ol> contexts via the list processors above
+            // Having a standalone <li> processor causes XML fragments and double processing
             // Line breaks: <br/> or <br>
             {
                 regex: /<br\s*\/?>/gis,
@@ -1477,62 +1499,32 @@ class NotionAITool {
         processed = processed.replace(/\s+/g, ' ').trim();
         return processed;
     }
-    // Helper function to process nested lists and flatten them for Notion
+    // Helper function to process lists using branch-based approach
+    // Each <ul> and <ol> represents a new branch that contains children
     static processNestedList(listContent, listType, blocks) {
         try {
-            // More robust list item extraction that handles nested <li> tags properly
-            const listItems = NotionAITool.extractListItems(listContent);
-            for (const itemContent of listItems) {
-                if (!itemContent.trim())
+            // Process each <li> element as a potential branch point
+            const listItems = NotionAITool.extractListItemsWithBranching(listContent);
+            for (const item of listItems) {
+                if (!item.text && !item.children.length)
                     continue;
-                // Check if this item contains nested lists
-                const hasNestedList = /<[uo]l\s*[^>]*>/i.test(itemContent);
-                if (hasNestedList) {
-                    // Split content into text parts and nested list parts
-                    const parts = itemContent.split(/(<[uo]l\s*[^>]*>[\s\S]*?<\/[uo]l>)/gi);
-                    for (let i = 0; i < parts.length; i++) {
-                        const part = parts[i].trim();
-                        if (!part)
-                            continue;
-                        // Check if this part is a nested list
-                        const isNestedList = /<[uo]l\s*[^>]*>[\s\S]*?<\/[uo]l>/gi.test(part);
-                        if (isNestedList) {
-                            // Process the nested list
-                            const nestedListMatch = part.match(/<([uo]l)\s*[^>]*>([\s\S]*?)<\/\1>/i);
-                            if (nestedListMatch) {
-                                const [, listTag, innerContent] = nestedListMatch;
-                                const nestedListType = listTag === 'ul' ? 'bulleted_list_item' : 'numbered_list_item';
-                                // Recursively process nested list
-                                NotionAITool.processNestedList(innerContent, nestedListType, blocks);
-                            }
-                        }
-                        else {
-                            // This is text content - clean it and add as a list item
-                            // Only process non-empty text parts as separate list items
-                            const cleanContent = NotionAITool.processNestedHtmlInListItem(part);
-                            if (cleanContent) {
-                                blocks.push({
-                                    type: listType,
-                                    [listType]: {
-                                        rich_text: NotionAITool.parseBasicMarkdown(cleanContent),
-                                    },
-                                });
-                            }
-                        }
-                    }
-                }
-                else {
-                    // Simple item without nested lists
-                    const cleanContent = NotionAITool.processNestedHtmlInListItem(itemContent);
-                    if (cleanContent) {
+                // Create list item for the parent text (if any)
+                if (item.text && item.text.trim()) {
+                    const cleanText = NotionAITool.processNestedHtmlInListItem(item.text);
+                    if (cleanText) {
                         blocks.push({
                             type: listType,
                             [listType]: {
-                                rich_text: NotionAITool.parseBasicMarkdown(cleanContent),
+                                rich_text: NotionAITool.parseBasicMarkdown(cleanText),
                             },
                         });
                     }
                 }
+                // Process each child branch
+                for (const child of item.children) {
+                    const childListType = child.type === 'ul' ? 'bulleted_list_item' : 'numbered_list_item';
+                    NotionAITool.processNestedList(child.content, childListType, blocks);
+                }
             }
         }
         catch (error) {
@@ -1546,58 +1538,216 @@ class NotionAITool {
             });
         }
     }
-    // Helper function to properly extract list items handling nested <li> tags
-    static extractListItems(content) {
+    // Extract list items with proper branching structure - only process top-level <li> tags
+    static extractListItemsWithBranching(content) {
         const items = [];
-        let currentPos = 0;
-        while (currentPos < content.length) {
-            // Find the next <li> opening tag
-            const liStart = content.indexOf('<li', currentPos);
+        let pos = 0;
+        while (pos < content.length) {
+            // Find next <li> tag at the current level
+            const liStart = content.indexOf('<li', pos);
             if (liStart === -1)
                 break;
-            // Find the end of the opening tag
-            const openTagEnd = content.indexOf('>', liStart);
-            if (openTagEnd === -1)
+            const liOpenEnd = content.indexOf('>', liStart);
+            if (liOpenEnd === -1)
                 break;
-            // Now find the matching closing </li> tag accounting for nesting
-            let depth = 1;
-            let pos = openTagEnd + 1;
-            let itemEnd = -1;
-            while (pos < content.length && depth > 0) {
-                const nextLiOpen = content.indexOf('<li', pos);
-                const nextLiClose = content.indexOf('</li>', pos);
-                // If no more closing tags, we're done
+            // Find the matching </li> using proper depth tracking for nested tags
+            let depth = 0;
+            let searchPos = liOpenEnd + 1; // Start after the opening <li> tag
+            let liEnd = -1;
+            while (searchPos < content.length) {
+                const nextLiOpen = content.indexOf('<li', searchPos);
+                const nextLiClose = content.indexOf('</li>', searchPos);
+                // Handle case where no more closing tags
                 if (nextLiClose === -1)
                     break;
-                // If there's an opening tag before the next closing tag, increase depth
+                // If there's an opening tag before the next closing tag
                 if (nextLiOpen !== -1 && nextLiOpen < nextLiClose) {
                     depth++;
-                    pos = nextLiOpen + 3; // Move past '<li'
+                    searchPos = nextLiOpen + 3; // Move past '<li'
                 }
                 else {
                     // Found a closing tag
-                    depth--;
                     if (depth === 0) {
-                        itemEnd = nextLiClose + 5; // Include the '</li>'
+                        // This is our matching closing tag
+                        liEnd = nextLiClose;
+                        break;
+                    }
+                    else {
+                        // This closing tag belongs to a nested li
+                        depth--;
+                        searchPos = nextLiClose + 5; // Move past '</li>'
+                    }
+                }
+            }
+            if (liEnd === -1) {
+                // No matching closing tag found
+                pos = liOpenEnd + 1;
+                continue;
+            }
+            // Extract the content between <li> and </li>
+            const fullItemContent = content.substring(liOpenEnd + 1, liEnd);
+            if (!fullItemContent.trim()) {
+                pos = liEnd + 5;
+                continue;
+            }
+            const item = { text: '', children: [] };
+            // Process the content to separate text from nested lists
+            let contentPos = 0;
+            let textParts = [];
+            while (contentPos < fullItemContent.length) {
+                // Look for the next nested list (ul or ol)
+                const nextUlStart = fullItemContent.indexOf('<ul', contentPos);
+                const nextOlStart = fullItemContent.indexOf('<ol', contentPos);
+                let nextListStart = -1;
+                let listType = '';
+                if (nextUlStart !== -1 && (nextOlStart === -1 || nextUlStart < nextOlStart)) {
+                    nextListStart = nextUlStart;
+                    listType = 'ul';
+                }
+                else if (nextOlStart !== -1) {
+                    nextListStart = nextOlStart;
+                    listType = 'ol';
+                }
+                if (nextListStart === -1) {
+                    // No more nested lists - add remaining text
+                    const remainingText = fullItemContent.substring(contentPos);
+                    if (remainingText.trim()) {
+                        textParts.push(remainingText);
+                    }
+                    break;
+                }
+                // Add text before the nested list
+                const textBefore = fullItemContent.substring(contentPos, nextListStart);
+                if (textBefore.trim()) {
+                    textParts.push(textBefore);
+                }
+                // Find the end of this nested list
+                const listOpenEnd = fullItemContent.indexOf('>', nextListStart);
+                if (listOpenEnd === -1) {
+                    // Malformed list tag
+                    textParts.push(fullItemContent.substring(contentPos));
+                    break;
+                }
+                // Track depth to find the matching closing tag
+                let listDepth = 1;
+                let listSearchPos = listOpenEnd + 1;
+                let listEnd = -1;
+                const openTag = `<${listType}`;
+                const closeTag = `</${listType}>`;
+                while (listSearchPos < fullItemContent.length && listDepth > 0) {
+                    const nextListOpen = fullItemContent.indexOf(openTag, listSearchPos);
+                    const nextListClose = fullItemContent.indexOf(closeTag, listSearchPos);
+                    if (nextListClose === -1)
                         break;
+                    if (nextListOpen !== -1 && nextListOpen < nextListClose) {
+                        listDepth++;
+                        listSearchPos = nextListOpen + openTag.length;
                     }
                     else {
-                        pos = nextLiClose + 5; // Move past '</li>'
+                        listDepth--;
+                        if (listDepth === 0) {
+                            listEnd = nextListClose + closeTag.length;
+                            break;
+                        }
+                        listSearchPos = nextListClose + closeTag.length;
                     }
                 }
+                if (listEnd !== -1) {
+                    // Extract the content between <ul>/<ol> and </ul>/<ol>
+                    const listContent = fullItemContent.substring(listOpenEnd + 1, listEnd - closeTag.length);
+                    item.children.push({
+                        type: listType,
+                        content: listContent
+                    });
+                    contentPos = listEnd;
+                }
+                else {
+                    // Malformed nested list - treat remaining as text
+                    textParts.push(fullItemContent.substring(contentPos));
+                    break;
+                }
             }
-            if (itemEnd !== -1) {
-                // Extract the content between <li...> and </li>
-                const fullMatch = content.substring(liStart, itemEnd);
-                const innerMatch = fullMatch.match(/<li[^>]*>([\s\S]*)<\/li>$/);
-                if (innerMatch) {
-                    items.push(innerMatch[1]);
+            // Combine all text parts and clean them
+            if (textParts.length > 0) {
+                const combinedText = textParts.join(' ').trim();
+                const cleanText = NotionAITool.processNestedHtmlInListItem(combinedText);
+                if (cleanText) {
+                    item.text = cleanText;
                 }
-                currentPos = itemEnd;
             }
-            else {
-                // Malformed HTML, skip this tag
-                currentPos = openTagEnd + 1;
+            // Only add items that have either text or children
+            if (item.text.trim() || item.children.length > 0) {
+                items.push(item);
+            }
+            pos = liEnd + 5; // Move past </li>
+        }
+        return items;
+    }
+    // Helper function to properly extract list items handling nested <li> tags
+    static extractListItems(content) {
+        const items = [];
+        // Use a more robust regex approach that respects nesting
+        // This regex captures the complete <li>...</li> blocks including nested content
+        const liRegex = /<li[^>]*>((?:[^<]|<(?!\/li>))*?(?:<[uo]l[^>]*>[\s\S]*?<\/[uo]l>(?:[^<]|<(?!\/li>))*?)*?)<\/li>/gi;
+        let match;
+        while ((match = liRegex.exec(content)) !== null) {
+            const itemContent = match[1];
+            if (itemContent && itemContent.trim()) {
+                items.push(itemContent.trim());
+            }
+        }
+        // Fallback to the old depth-tracking method if regex fails
+        if (items.length === 0) {
+            let currentPos = 0;
+            while (currentPos < content.length) {
+                // Find the next <li> opening tag
+                const liStart = content.indexOf('<li', currentPos);
+                if (liStart === -1)
+                    break;
+                // Find the end of the opening tag
+                const openTagEnd = content.indexOf('>', liStart);
+                if (openTagEnd === -1)
+                    break;
+                // Now find the matching closing </li> tag accounting for nesting
+                let depth = 1;
+                let pos = openTagEnd + 1;
+                let itemEnd = -1;
+                while (pos < content.length && depth > 0) {
+                    const nextLiOpen = content.indexOf('<li', pos);
+                    const nextLiClose = content.indexOf('</li>', pos);
+                    // If no more closing tags, we're done
+                    if (nextLiClose === -1)
+                        break;
+                    // If there's an opening tag before the next closing tag, increase depth
+                    if (nextLiOpen !== -1 && nextLiOpen < nextLiClose) {
+                        depth++;
+                        pos = nextLiOpen + 3; // Move past '<li'
+                    }
+                    else {
+                        // Found a closing tag
+                        depth--;
+                        if (depth === 0) {
+                            itemEnd = nextLiClose + 5; // Include the '</li>'
+                            break;
+                        }
+                        else {
+                            pos = nextLiClose + 5; // Move past '</li>'
+                        }
+                    }
+                }
+                if (itemEnd !== -1) {
+                    // Extract the content between <li...> and </li>
+                    const fullMatch = content.substring(liStart, itemEnd);
+                    const innerMatch = fullMatch.match(/<li[^>]*>([\s\S]*)<\/li>$/);
+                    if (innerMatch) {
+                        items.push(innerMatch[1]);
+                    }
+                    currentPos = itemEnd;
+                }
+                else {
+                    // Malformed HTML, skip this tag
+                    currentPos = openTagEnd + 1;
+                }
             }
         }
         return items;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "n8n-nodes-notion-advanced",
-  "version": "1.2.28-beta",
+  "version": "1.2.29-beta",
   "description": "Advanced n8n Notion nodes: Full-featured workflow node + AI Agent Tool for intelligent Notion automation with 25+ block types (BETA)",
   "scripts": {},
   "files": [