n8n-nodes-notion-advanced 1.2.28-beta → 1.2.30-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -55,6 +55,13 @@ export declare class NotionAITool implements INodeType {
55
55
  static processNestedHtmlInListItem(content: string): string;
56
56
  static convertInlineHtmlToMarkdown(content: string): string;
57
57
  static processNestedList(listContent: string, listType: 'bulleted_list_item' | 'numbered_list_item', blocks: IDataObject[]): void;
58
+ static extractListItemsWithBranching(content: string): Array<{
59
+ text: string;
60
+ children: Array<{
61
+ type: string;
62
+ content: string;
63
+ }>;
64
+ }>;
58
65
  static extractListItems(content: string): string[];
59
66
  static getCalloutEmoji(type: string): string;
60
67
  static getCalloutColor(type: string): string;
@@ -745,40 +745,77 @@ class NotionAITool {
745
745
  return charIndex;
746
746
  }
747
747
  }
748
- // Enhanced hierarchical XML tree structure that catches ALL XML content
748
+ // Enhanced hierarchical XML tree structure using depth-aware parsing
749
749
  static buildXMLTree(content, tagProcessors) {
750
750
  var _a;
751
751
  const allMatches = [];
752
- const processedRanges = [];
753
- // Step 1: Collect all XML tags with specific processors
752
+ // Step 1: Use depth-aware parsing for each tag processor
754
753
  tagProcessors.forEach(({ regex, blockCreator, listProcessor }) => {
755
754
  var _a;
756
- const globalRegex = new RegExp(regex.source, 'gis');
757
- let match;
758
- while ((match = globalRegex.exec(content)) !== null) {
759
- const tagName = ((_a = match[0].match(/<(\w+)/)) === null || _a === void 0 ? void 0 : _a[1]) || 'unknown';
760
- const xmlNode = {
761
- id: `${tagName}_${match.index}_${Date.now()}_${Math.random()}`,
762
- tagName,
763
- start: match.index,
764
- end: match.index + match[0].length,
765
- match: match[0],
766
- processor: blockCreator,
767
- groups: match.slice(1),
768
- children: [],
769
- depth: 0,
770
- innerContent: match[0],
771
- replacement: undefined,
772
- listProcessor
773
- };
774
- allMatches.push(xmlNode);
775
- processedRanges.push({ start: xmlNode.start, end: xmlNode.end });
755
+ const tagPattern = (_a = regex.source.match(/<(\w+)/)) === null || _a === void 0 ? void 0 : _a[1];
756
+ if (!tagPattern)
757
+ return;
758
+ // Find all opening tags of this type
759
+ let pos = 0;
760
+ while (pos < content.length) {
761
+ const openTagStart = content.indexOf(`<${tagPattern}`, pos);
762
+ if (openTagStart === -1)
763
+ break;
764
+ const openTagEnd = content.indexOf('>', openTagStart);
765
+ if (openTagEnd === -1)
766
+ break;
767
+ // Find matching closing tag using depth tracking
768
+ let depth = 1;
769
+ let searchPos = openTagEnd + 1;
770
+ let closeTagStart = -1;
771
+ const openPattern = `<${tagPattern}`;
772
+ const closePattern = `</${tagPattern}>`;
773
+ while (searchPos < content.length && depth > 0) {
774
+ const nextOpen = content.indexOf(openPattern, searchPos);
775
+ const nextClose = content.indexOf(closePattern, searchPos);
776
+ if (nextClose === -1)
777
+ break;
778
+ if (nextOpen !== -1 && nextOpen < nextClose) {
779
+ // Found nested opening tag
780
+ depth++;
781
+ searchPos = nextOpen + openPattern.length;
782
+ }
783
+ else {
784
+ // Found closing tag
785
+ depth--;
786
+ if (depth === 0) {
787
+ closeTagStart = nextClose;
788
+ break;
789
+ }
790
+ searchPos = nextClose + closePattern.length;
791
+ }
792
+ }
793
+ if (closeTagStart !== -1) {
794
+ const fullMatch = content.substring(openTagStart, closeTagStart + closePattern.length);
795
+ const innerContent = content.substring(openTagEnd + 1, closeTagStart);
796
+ const xmlNode = {
797
+ id: `${tagPattern}_${openTagStart}_${Date.now()}_${Math.random()}`,
798
+ tagName: tagPattern,
799
+ start: openTagStart,
800
+ end: closeTagStart + closePattern.length,
801
+ match: fullMatch,
802
+ processor: blockCreator,
803
+ groups: [innerContent], // For list processors, group[0] is the inner content
804
+ children: [],
805
+ depth: 0,
806
+ innerContent,
807
+ replacement: undefined,
808
+ listProcessor
809
+ };
810
+ allMatches.push(xmlNode);
811
+ }
812
+ pos = openTagEnd + 1;
776
813
  }
777
814
  });
778
815
  // Step 2: Catch ANY remaining XML/HTML tags that weren't processed by specific processors
779
- // This prevents ANY XML content from falling through to traditional processing
780
816
  const genericXmlRegex = /<[^>]+>[\s\S]*?<\/[^>]+>|<[^>]+\/>/gis;
781
817
  let genericMatch;
818
+ const processedRanges = allMatches.map(node => ({ start: node.start, end: node.end }));
782
819
  while ((genericMatch = genericXmlRegex.exec(content)) !== null) {
783
820
  const matchStart = genericMatch.index;
784
821
  const matchEnd = genericMatch.index + genericMatch[0].length;
@@ -792,7 +829,7 @@ class NotionAITool {
792
829
  start: matchStart,
793
830
  end: matchEnd,
794
831
  match: genericMatch[0],
795
- processor: () => null, // Generic processor that just removes the content
832
+ processor: () => null,
796
833
  groups: [],
797
834
  children: [],
798
835
  depth: 0,
@@ -801,12 +838,11 @@ class NotionAITool {
801
838
  listProcessor: undefined
802
839
  };
803
840
  allMatches.push(xmlNode);
804
- processedRanges.push({ start: matchStart, end: matchEnd });
805
841
  }
806
842
  }
807
843
  // Sort by start position to maintain document order
808
844
  allMatches.sort((a, b) => a.start - b.start);
809
- // Build parent-child relationships while preserving ordering
845
+ // Build parent-child relationships
810
846
  const rootNodes = [];
811
847
  const nodeStack = [];
812
848
  for (const node of allMatches) {
@@ -826,7 +862,7 @@ class NotionAITool {
826
862
  // This is a root node
827
863
  rootNodes.push(node);
828
864
  }
829
- // Only push self-contained tags to stack (not self-closing)
865
+ // Push to stack for potential children
830
866
  if (!node.match.endsWith('/>') && node.match.includes('</')) {
831
867
  nodeStack.push(node);
832
868
  }
@@ -1179,23 +1215,9 @@ class NotionAITool {
1179
1215
  };
1180
1216
  }
1181
1217
  },
1182
- // Standalone list items (only if not already processed in lists): <li>content</li>
1183
- {
1184
- regex: /<li\s*[^>]*>(.*?)<\/li>/gis,
1185
- blockCreator: (content) => {
1186
- if (content.trim()) {
1187
- // Convert HTML to markdown first, then parse to rich text
1188
- const markdownContent = NotionAITool.convertInlineHtmlToMarkdown(content.trim());
1189
- return {
1190
- type: 'bulleted_list_item',
1191
- bulleted_list_item: {
1192
- rich_text: NotionAITool.parseBasicMarkdown(markdownContent),
1193
- },
1194
- };
1195
- }
1196
- return null;
1197
- }
1198
- },
1218
+ // REMOVED: Standalone <li> processor
1219
+ // <li> tags should ONLY be processed within <ul>/<ol> contexts via the list processors above
1220
+ // Having a standalone <li> processor causes XML fragments and double processing
1199
1221
  // Line breaks: <br/> or <br>
1200
1222
  {
1201
1223
  regex: /<br\s*\/?>/gis,
@@ -1477,61 +1499,45 @@ class NotionAITool {
1477
1499
  processed = processed.replace(/\s+/g, ' ').trim();
1478
1500
  return processed;
1479
1501
  }
1480
- // Helper function to process nested lists and flatten them for Notion
1502
+ // Helper function to process lists using branch-based approach
1503
+ // Each <ul> and <ol> represents a new branch that contains children
1481
1504
  static processNestedList(listContent, listType, blocks) {
1482
1505
  try {
1483
- // More robust list item extraction that handles nested <li> tags properly
1484
- const listItems = NotionAITool.extractListItems(listContent);
1485
- for (const itemContent of listItems) {
1486
- if (!itemContent.trim())
1506
+ // Process each <li> element as a potential branch point
1507
+ const listItems = NotionAITool.extractListItemsWithBranching(listContent);
1508
+ for (const item of listItems) {
1509
+ if (!item.text && !item.children.length)
1487
1510
  continue;
1488
- // Check if this item contains nested lists
1489
- const hasNestedList = /<[uo]l\s*[^>]*>/i.test(itemContent);
1490
- if (hasNestedList) {
1491
- // Split content into text parts and nested list parts
1492
- const parts = itemContent.split(/(<[uo]l\s*[^>]*>[\s\S]*?<\/[uo]l>)/gi);
1493
- for (let i = 0; i < parts.length; i++) {
1494
- const part = parts[i].trim();
1495
- if (!part)
1496
- continue;
1497
- // Check if this part is a nested list
1498
- const isNestedList = /<[uo]l\s*[^>]*>[\s\S]*?<\/[uo]l>/gi.test(part);
1499
- if (isNestedList) {
1500
- // Process the nested list
1501
- const nestedListMatch = part.match(/<([uo]l)\s*[^>]*>([\s\S]*?)<\/\1>/i);
1502
- if (nestedListMatch) {
1503
- const [, listTag, innerContent] = nestedListMatch;
1504
- const nestedListType = listTag === 'ul' ? 'bulleted_list_item' : 'numbered_list_item';
1505
- // Recursively process nested list
1506
- NotionAITool.processNestedList(innerContent, nestedListType, blocks);
1507
- }
1508
- }
1509
- else {
1510
- // This is text content - clean it and add as a list item
1511
- // Only process non-empty text parts as separate list items
1512
- const cleanContent = NotionAITool.processNestedHtmlInListItem(part);
1513
- if (cleanContent) {
1514
- blocks.push({
1515
- type: listType,
1516
- [listType]: {
1517
- rich_text: NotionAITool.parseBasicMarkdown(cleanContent),
1518
- },
1519
- });
1520
- }
1521
- }
1511
+ // Create list item block
1512
+ const listItemBlock = {
1513
+ type: listType,
1514
+ [listType]: {
1515
+ rich_text: [],
1516
+ },
1517
+ };
1518
+ // Add parent text if present
1519
+ if (item.text && item.text.trim()) {
1520
+ const cleanText = NotionAITool.processNestedHtmlInListItem(item.text);
1521
+ if (cleanText) {
1522
+ listItemBlock[listType].rich_text = NotionAITool.parseBasicMarkdown(cleanText);
1522
1523
  }
1523
1524
  }
1524
- else {
1525
- // Simple item without nested lists
1526
- const cleanContent = NotionAITool.processNestedHtmlInListItem(itemContent);
1527
- if (cleanContent) {
1528
- blocks.push({
1529
- type: listType,
1530
- [listType]: {
1531
- rich_text: NotionAITool.parseBasicMarkdown(cleanContent),
1532
- },
1533
- });
1525
+ // Process child branches and add them as nested children
1526
+ if (item.children.length > 0) {
1527
+ const childBlocks = [];
1528
+ for (const child of item.children) {
1529
+ const childListType = child.type === 'ul' ? 'bulleted_list_item' : 'numbered_list_item';
1530
+ NotionAITool.processNestedList(child.content, childListType, childBlocks);
1534
1531
  }
1532
+ // Add children to the parent block
1533
+ if (childBlocks.length > 0) {
1534
+ listItemBlock[listType].children = childBlocks;
1535
+ }
1536
+ }
1537
+ // Only add the block if it has text or children
1538
+ const listData = listItemBlock[listType];
1539
+ if ((listData.rich_text && listData.rich_text.length > 0) || listData.children) {
1540
+ blocks.push(listItemBlock);
1535
1541
  }
1536
1542
  }
1537
1543
  }
@@ -1546,58 +1552,216 @@ class NotionAITool {
1546
1552
  });
1547
1553
  }
1548
1554
  }
1549
- // Helper function to properly extract list items handling nested <li> tags
1550
- static extractListItems(content) {
1555
+ // Extract list items with proper branching structure - only process top-level <li> tags
1556
+ static extractListItemsWithBranching(content) {
1551
1557
  const items = [];
1552
- let currentPos = 0;
1553
- while (currentPos < content.length) {
1554
- // Find the next <li> opening tag
1555
- const liStart = content.indexOf('<li', currentPos);
1558
+ let pos = 0;
1559
+ while (pos < content.length) {
1560
+ // Find next <li> tag at the current level
1561
+ const liStart = content.indexOf('<li', pos);
1556
1562
  if (liStart === -1)
1557
1563
  break;
1558
- // Find the end of the opening tag
1559
- const openTagEnd = content.indexOf('>', liStart);
1560
- if (openTagEnd === -1)
1564
+ const liOpenEnd = content.indexOf('>', liStart);
1565
+ if (liOpenEnd === -1)
1561
1566
  break;
1562
- // Now find the matching closing </li> tag accounting for nesting
1563
- let depth = 1;
1564
- let pos = openTagEnd + 1;
1565
- let itemEnd = -1;
1566
- while (pos < content.length && depth > 0) {
1567
- const nextLiOpen = content.indexOf('<li', pos);
1568
- const nextLiClose = content.indexOf('</li>', pos);
1569
- // If no more closing tags, we're done
1567
+ // Find the matching </li> using proper depth tracking for nested tags
1568
+ let depth = 0;
1569
+ let searchPos = liOpenEnd + 1; // Start after the opening <li> tag
1570
+ let liEnd = -1;
1571
+ while (searchPos < content.length) {
1572
+ const nextLiOpen = content.indexOf('<li', searchPos);
1573
+ const nextLiClose = content.indexOf('</li>', searchPos);
1574
+ // Handle case where no more closing tags
1570
1575
  if (nextLiClose === -1)
1571
1576
  break;
1572
- // If there's an opening tag before the next closing tag, increase depth
1577
+ // If there's an opening tag before the next closing tag
1573
1578
  if (nextLiOpen !== -1 && nextLiOpen < nextLiClose) {
1574
1579
  depth++;
1575
- pos = nextLiOpen + 3; // Move past '<li'
1580
+ searchPos = nextLiOpen + 3; // Move past '<li'
1576
1581
  }
1577
1582
  else {
1578
1583
  // Found a closing tag
1579
- depth--;
1580
1584
  if (depth === 0) {
1581
- itemEnd = nextLiClose + 5; // Include the '</li>'
1585
+ // This is our matching closing tag
1586
+ liEnd = nextLiClose;
1587
+ break;
1588
+ }
1589
+ else {
1590
+ // This closing tag belongs to a nested li
1591
+ depth--;
1592
+ searchPos = nextLiClose + 5; // Move past '</li>'
1593
+ }
1594
+ }
1595
+ }
1596
+ if (liEnd === -1) {
1597
+ // No matching closing tag found
1598
+ pos = liOpenEnd + 1;
1599
+ continue;
1600
+ }
1601
+ // Extract the content between <li> and </li>
1602
+ const fullItemContent = content.substring(liOpenEnd + 1, liEnd);
1603
+ if (!fullItemContent.trim()) {
1604
+ pos = liEnd + 5;
1605
+ continue;
1606
+ }
1607
+ const item = { text: '', children: [] };
1608
+ // Process the content to separate text from nested lists
1609
+ let contentPos = 0;
1610
+ let textParts = [];
1611
+ while (contentPos < fullItemContent.length) {
1612
+ // Look for the next nested list (ul or ol)
1613
+ const nextUlStart = fullItemContent.indexOf('<ul', contentPos);
1614
+ const nextOlStart = fullItemContent.indexOf('<ol', contentPos);
1615
+ let nextListStart = -1;
1616
+ let listType = '';
1617
+ if (nextUlStart !== -1 && (nextOlStart === -1 || nextUlStart < nextOlStart)) {
1618
+ nextListStart = nextUlStart;
1619
+ listType = 'ul';
1620
+ }
1621
+ else if (nextOlStart !== -1) {
1622
+ nextListStart = nextOlStart;
1623
+ listType = 'ol';
1624
+ }
1625
+ if (nextListStart === -1) {
1626
+ // No more nested lists - add remaining text
1627
+ const remainingText = fullItemContent.substring(contentPos);
1628
+ if (remainingText.trim()) {
1629
+ textParts.push(remainingText);
1630
+ }
1631
+ break;
1632
+ }
1633
+ // Add text before the nested list
1634
+ const textBefore = fullItemContent.substring(contentPos, nextListStart);
1635
+ if (textBefore.trim()) {
1636
+ textParts.push(textBefore);
1637
+ }
1638
+ // Find the end of this nested list
1639
+ const listOpenEnd = fullItemContent.indexOf('>', nextListStart);
1640
+ if (listOpenEnd === -1) {
1641
+ // Malformed list tag
1642
+ textParts.push(fullItemContent.substring(contentPos));
1643
+ break;
1644
+ }
1645
+ // Track depth to find the matching closing tag
1646
+ let listDepth = 1;
1647
+ let listSearchPos = listOpenEnd + 1;
1648
+ let listEnd = -1;
1649
+ const openTag = `<${listType}`;
1650
+ const closeTag = `</${listType}>`;
1651
+ while (listSearchPos < fullItemContent.length && listDepth > 0) {
1652
+ const nextListOpen = fullItemContent.indexOf(openTag, listSearchPos);
1653
+ const nextListClose = fullItemContent.indexOf(closeTag, listSearchPos);
1654
+ if (nextListClose === -1)
1582
1655
  break;
1656
+ if (nextListOpen !== -1 && nextListOpen < nextListClose) {
1657
+ listDepth++;
1658
+ listSearchPos = nextListOpen + openTag.length;
1583
1659
  }
1584
1660
  else {
1585
- pos = nextLiClose + 5; // Move past '</li>'
1661
+ listDepth--;
1662
+ if (listDepth === 0) {
1663
+ listEnd = nextListClose + closeTag.length;
1664
+ break;
1665
+ }
1666
+ listSearchPos = nextListClose + closeTag.length;
1586
1667
  }
1587
1668
  }
1669
+ if (listEnd !== -1) {
1670
+ // Extract the content between <ul>/<ol> and </ul>/<ol>
1671
+ const listContent = fullItemContent.substring(listOpenEnd + 1, listEnd - closeTag.length);
1672
+ item.children.push({
1673
+ type: listType,
1674
+ content: listContent
1675
+ });
1676
+ contentPos = listEnd;
1677
+ }
1678
+ else {
1679
+ // Malformed nested list - treat remaining as text
1680
+ textParts.push(fullItemContent.substring(contentPos));
1681
+ break;
1682
+ }
1588
1683
  }
1589
- if (itemEnd !== -1) {
1590
- // Extract the content between <li...> and </li>
1591
- const fullMatch = content.substring(liStart, itemEnd);
1592
- const innerMatch = fullMatch.match(/<li[^>]*>([\s\S]*)<\/li>$/);
1593
- if (innerMatch) {
1594
- items.push(innerMatch[1]);
1684
+ // Combine all text parts and clean them
1685
+ if (textParts.length > 0) {
1686
+ const combinedText = textParts.join(' ').trim();
1687
+ const cleanText = NotionAITool.processNestedHtmlInListItem(combinedText);
1688
+ if (cleanText) {
1689
+ item.text = cleanText;
1595
1690
  }
1596
- currentPos = itemEnd;
1597
1691
  }
1598
- else {
1599
- // Malformed HTML, skip this tag
1600
- currentPos = openTagEnd + 1;
1692
+ // Only add items that have either text or children
1693
+ if (item.text.trim() || item.children.length > 0) {
1694
+ items.push(item);
1695
+ }
1696
+ pos = liEnd + 5; // Move past </li>
1697
+ }
1698
+ return items;
1699
+ }
1700
+ // Helper function to properly extract list items handling nested <li> tags
1701
+ static extractListItems(content) {
1702
+ const items = [];
1703
+ // Use a more robust regex approach that respects nesting
1704
+ // This regex captures the complete <li>...</li> blocks including nested content
1705
+ const liRegex = /<li[^>]*>((?:[^<]|<(?!\/li>))*?(?:<[uo]l[^>]*>[\s\S]*?<\/[uo]l>(?:[^<]|<(?!\/li>))*?)*?)<\/li>/gi;
1706
+ let match;
1707
+ while ((match = liRegex.exec(content)) !== null) {
1708
+ const itemContent = match[1];
1709
+ if (itemContent && itemContent.trim()) {
1710
+ items.push(itemContent.trim());
1711
+ }
1712
+ }
1713
+ // Fallback to the old depth-tracking method if regex fails
1714
+ if (items.length === 0) {
1715
+ let currentPos = 0;
1716
+ while (currentPos < content.length) {
1717
+ // Find the next <li> opening tag
1718
+ const liStart = content.indexOf('<li', currentPos);
1719
+ if (liStart === -1)
1720
+ break;
1721
+ // Find the end of the opening tag
1722
+ const openTagEnd = content.indexOf('>', liStart);
1723
+ if (openTagEnd === -1)
1724
+ break;
1725
+ // Now find the matching closing </li> tag accounting for nesting
1726
+ let depth = 1;
1727
+ let pos = openTagEnd + 1;
1728
+ let itemEnd = -1;
1729
+ while (pos < content.length && depth > 0) {
1730
+ const nextLiOpen = content.indexOf('<li', pos);
1731
+ const nextLiClose = content.indexOf('</li>', pos);
1732
+ // If no more closing tags, we're done
1733
+ if (nextLiClose === -1)
1734
+ break;
1735
+ // If there's an opening tag before the next closing tag, increase depth
1736
+ if (nextLiOpen !== -1 && nextLiOpen < nextLiClose) {
1737
+ depth++;
1738
+ pos = nextLiOpen + 3; // Move past '<li'
1739
+ }
1740
+ else {
1741
+ // Found a closing tag
1742
+ depth--;
1743
+ if (depth === 0) {
1744
+ itemEnd = nextLiClose + 5; // Include the '</li>'
1745
+ break;
1746
+ }
1747
+ else {
1748
+ pos = nextLiClose + 5; // Move past '</li>'
1749
+ }
1750
+ }
1751
+ }
1752
+ if (itemEnd !== -1) {
1753
+ // Extract the content between <li...> and </li>
1754
+ const fullMatch = content.substring(liStart, itemEnd);
1755
+ const innerMatch = fullMatch.match(/<li[^>]*>([\s\S]*)<\/li>$/);
1756
+ if (innerMatch) {
1757
+ items.push(innerMatch[1]);
1758
+ }
1759
+ currentPos = itemEnd;
1760
+ }
1761
+ else {
1762
+ // Malformed HTML, skip this tag
1763
+ currentPos = openTagEnd + 1;
1764
+ }
1601
1765
  }
1602
1766
  }
1603
1767
  return items;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "n8n-nodes-notion-advanced",
3
- "version": "1.2.28-beta",
3
+ "version": "1.2.30-beta",
4
4
  "description": "Advanced n8n Notion nodes: Full-featured workflow node + AI Agent Tool for intelligent Notion automation with 25+ block types (BETA)",
5
5
  "scripts": {},
6
6
  "files": [