n8n-nodes-notion-advanced 1.2.28-beta → 1.2.29-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -55,6 +55,13 @@ export declare class NotionAITool implements INodeType {
55
55
  static processNestedHtmlInListItem(content: string): string;
56
56
  static convertInlineHtmlToMarkdown(content: string): string;
57
57
  static processNestedList(listContent: string, listType: 'bulleted_list_item' | 'numbered_list_item', blocks: IDataObject[]): void;
58
+ static extractListItemsWithBranching(content: string): Array<{
59
+ text: string;
60
+ children: Array<{
61
+ type: string;
62
+ content: string;
63
+ }>;
64
+ }>;
58
65
  static extractListItems(content: string): string[];
59
66
  static getCalloutEmoji(type: string): string;
60
67
  static getCalloutColor(type: string): string;
@@ -745,40 +745,77 @@ class NotionAITool {
745
745
  return charIndex;
746
746
  }
747
747
  }
748
- // Enhanced hierarchical XML tree structure that catches ALL XML content
748
+ // Enhanced hierarchical XML tree structure using depth-aware parsing
749
749
  static buildXMLTree(content, tagProcessors) {
750
750
  var _a;
751
751
  const allMatches = [];
752
- const processedRanges = [];
753
- // Step 1: Collect all XML tags with specific processors
752
+ // Step 1: Use depth-aware parsing for each tag processor
754
753
  tagProcessors.forEach(({ regex, blockCreator, listProcessor }) => {
755
754
  var _a;
756
- const globalRegex = new RegExp(regex.source, 'gis');
757
- let match;
758
- while ((match = globalRegex.exec(content)) !== null) {
759
- const tagName = ((_a = match[0].match(/<(\w+)/)) === null || _a === void 0 ? void 0 : _a[1]) || 'unknown';
760
- const xmlNode = {
761
- id: `${tagName}_${match.index}_${Date.now()}_${Math.random()}`,
762
- tagName,
763
- start: match.index,
764
- end: match.index + match[0].length,
765
- match: match[0],
766
- processor: blockCreator,
767
- groups: match.slice(1),
768
- children: [],
769
- depth: 0,
770
- innerContent: match[0],
771
- replacement: undefined,
772
- listProcessor
773
- };
774
- allMatches.push(xmlNode);
775
- processedRanges.push({ start: xmlNode.start, end: xmlNode.end });
755
+ const tagPattern = (_a = regex.source.match(/<(\w+)/)) === null || _a === void 0 ? void 0 : _a[1];
756
+ if (!tagPattern)
757
+ return;
758
+ // Find all opening tags of this type
759
+ let pos = 0;
760
+ while (pos < content.length) {
761
+ const openTagStart = content.indexOf(`<${tagPattern}`, pos);
762
+ if (openTagStart === -1)
763
+ break;
764
+ const openTagEnd = content.indexOf('>', openTagStart);
765
+ if (openTagEnd === -1)
766
+ break;
767
+ // Find matching closing tag using depth tracking
768
+ let depth = 1;
769
+ let searchPos = openTagEnd + 1;
770
+ let closeTagStart = -1;
771
+ const openPattern = `<${tagPattern}`;
772
+ const closePattern = `</${tagPattern}>`;
773
+ while (searchPos < content.length && depth > 0) {
774
+ const nextOpen = content.indexOf(openPattern, searchPos);
775
+ const nextClose = content.indexOf(closePattern, searchPos);
776
+ if (nextClose === -1)
777
+ break;
778
+ if (nextOpen !== -1 && nextOpen < nextClose) {
779
+ // Found nested opening tag
780
+ depth++;
781
+ searchPos = nextOpen + openPattern.length;
782
+ }
783
+ else {
784
+ // Found closing tag
785
+ depth--;
786
+ if (depth === 0) {
787
+ closeTagStart = nextClose;
788
+ break;
789
+ }
790
+ searchPos = nextClose + closePattern.length;
791
+ }
792
+ }
793
+ if (closeTagStart !== -1) {
794
+ const fullMatch = content.substring(openTagStart, closeTagStart + closePattern.length);
795
+ const innerContent = content.substring(openTagEnd + 1, closeTagStart);
796
+ const xmlNode = {
797
+ id: `${tagPattern}_${openTagStart}_${Date.now()}_${Math.random()}`,
798
+ tagName: tagPattern,
799
+ start: openTagStart,
800
+ end: closeTagStart + closePattern.length,
801
+ match: fullMatch,
802
+ processor: blockCreator,
803
+ groups: [innerContent], // For list processors, group[0] is the inner content
804
+ children: [],
805
+ depth: 0,
806
+ innerContent,
807
+ replacement: undefined,
808
+ listProcessor
809
+ };
810
+ allMatches.push(xmlNode);
811
+ }
812
+ pos = openTagEnd + 1;
776
813
  }
777
814
  });
778
815
  // Step 2: Catch ANY remaining XML/HTML tags that weren't processed by specific processors
779
- // This prevents ANY XML content from falling through to traditional processing
780
816
  const genericXmlRegex = /<[^>]+>[\s\S]*?<\/[^>]+>|<[^>]+\/>/gis;
781
817
  let genericMatch;
818
+ const processedRanges = allMatches.map(node => ({ start: node.start, end: node.end }));
782
819
  while ((genericMatch = genericXmlRegex.exec(content)) !== null) {
783
820
  const matchStart = genericMatch.index;
784
821
  const matchEnd = genericMatch.index + genericMatch[0].length;
@@ -792,7 +829,7 @@ class NotionAITool {
792
829
  start: matchStart,
793
830
  end: matchEnd,
794
831
  match: genericMatch[0],
795
- processor: () => null, // Generic processor that just removes the content
832
+ processor: () => null,
796
833
  groups: [],
797
834
  children: [],
798
835
  depth: 0,
@@ -801,12 +838,11 @@ class NotionAITool {
801
838
  listProcessor: undefined
802
839
  };
803
840
  allMatches.push(xmlNode);
804
- processedRanges.push({ start: matchStart, end: matchEnd });
805
841
  }
806
842
  }
807
843
  // Sort by start position to maintain document order
808
844
  allMatches.sort((a, b) => a.start - b.start);
809
- // Build parent-child relationships while preserving ordering
845
+ // Build parent-child relationships
810
846
  const rootNodes = [];
811
847
  const nodeStack = [];
812
848
  for (const node of allMatches) {
@@ -826,7 +862,7 @@ class NotionAITool {
826
862
  // This is a root node
827
863
  rootNodes.push(node);
828
864
  }
829
- // Only push self-contained tags to stack (not self-closing)
865
+ // Push to stack for potential children
830
866
  if (!node.match.endsWith('/>') && node.match.includes('</')) {
831
867
  nodeStack.push(node);
832
868
  }
@@ -1179,23 +1215,9 @@ class NotionAITool {
1179
1215
  };
1180
1216
  }
1181
1217
  },
1182
- // Standalone list items (only if not already processed in lists): <li>content</li>
1183
- {
1184
- regex: /<li\s*[^>]*>(.*?)<\/li>/gis,
1185
- blockCreator: (content) => {
1186
- if (content.trim()) {
1187
- // Convert HTML to markdown first, then parse to rich text
1188
- const markdownContent = NotionAITool.convertInlineHtmlToMarkdown(content.trim());
1189
- return {
1190
- type: 'bulleted_list_item',
1191
- bulleted_list_item: {
1192
- rich_text: NotionAITool.parseBasicMarkdown(markdownContent),
1193
- },
1194
- };
1195
- }
1196
- return null;
1197
- }
1198
- },
1218
+ // REMOVED: Standalone <li> processor
1219
+ // <li> tags should ONLY be processed within <ul>/<ol> contexts via the list processors above
1220
+ // Having a standalone <li> processor causes XML fragments and double processing
1199
1221
  // Line breaks: <br/> or <br>
1200
1222
  {
1201
1223
  regex: /<br\s*\/?>/gis,
@@ -1477,62 +1499,32 @@ class NotionAITool {
1477
1499
  processed = processed.replace(/\s+/g, ' ').trim();
1478
1500
  return processed;
1479
1501
  }
1480
- // Helper function to process nested lists and flatten them for Notion
1502
+ // Helper function to process lists using branch-based approach
1503
+ // Each <ul> and <ol> represents a new branch that contains children
1481
1504
  static processNestedList(listContent, listType, blocks) {
1482
1505
  try {
1483
- // More robust list item extraction that handles nested <li> tags properly
1484
- const listItems = NotionAITool.extractListItems(listContent);
1485
- for (const itemContent of listItems) {
1486
- if (!itemContent.trim())
1506
+ // Process each <li> element as a potential branch point
1507
+ const listItems = NotionAITool.extractListItemsWithBranching(listContent);
1508
+ for (const item of listItems) {
1509
+ if (!item.text && !item.children.length)
1487
1510
  continue;
1488
- // Check if this item contains nested lists
1489
- const hasNestedList = /<[uo]l\s*[^>]*>/i.test(itemContent);
1490
- if (hasNestedList) {
1491
- // Split content into text parts and nested list parts
1492
- const parts = itemContent.split(/(<[uo]l\s*[^>]*>[\s\S]*?<\/[uo]l>)/gi);
1493
- for (let i = 0; i < parts.length; i++) {
1494
- const part = parts[i].trim();
1495
- if (!part)
1496
- continue;
1497
- // Check if this part is a nested list
1498
- const isNestedList = /<[uo]l\s*[^>]*>[\s\S]*?<\/[uo]l>/gi.test(part);
1499
- if (isNestedList) {
1500
- // Process the nested list
1501
- const nestedListMatch = part.match(/<([uo]l)\s*[^>]*>([\s\S]*?)<\/\1>/i);
1502
- if (nestedListMatch) {
1503
- const [, listTag, innerContent] = nestedListMatch;
1504
- const nestedListType = listTag === 'ul' ? 'bulleted_list_item' : 'numbered_list_item';
1505
- // Recursively process nested list
1506
- NotionAITool.processNestedList(innerContent, nestedListType, blocks);
1507
- }
1508
- }
1509
- else {
1510
- // This is text content - clean it and add as a list item
1511
- // Only process non-empty text parts as separate list items
1512
- const cleanContent = NotionAITool.processNestedHtmlInListItem(part);
1513
- if (cleanContent) {
1514
- blocks.push({
1515
- type: listType,
1516
- [listType]: {
1517
- rich_text: NotionAITool.parseBasicMarkdown(cleanContent),
1518
- },
1519
- });
1520
- }
1521
- }
1522
- }
1523
- }
1524
- else {
1525
- // Simple item without nested lists
1526
- const cleanContent = NotionAITool.processNestedHtmlInListItem(itemContent);
1527
- if (cleanContent) {
1511
+ // Create list item for the parent text (if any)
1512
+ if (item.text && item.text.trim()) {
1513
+ const cleanText = NotionAITool.processNestedHtmlInListItem(item.text);
1514
+ if (cleanText) {
1528
1515
  blocks.push({
1529
1516
  type: listType,
1530
1517
  [listType]: {
1531
- rich_text: NotionAITool.parseBasicMarkdown(cleanContent),
1518
+ rich_text: NotionAITool.parseBasicMarkdown(cleanText),
1532
1519
  },
1533
1520
  });
1534
1521
  }
1535
1522
  }
1523
+ // Process each child branch
1524
+ for (const child of item.children) {
1525
+ const childListType = child.type === 'ul' ? 'bulleted_list_item' : 'numbered_list_item';
1526
+ NotionAITool.processNestedList(child.content, childListType, blocks);
1527
+ }
1536
1528
  }
1537
1529
  }
1538
1530
  catch (error) {
@@ -1546,58 +1538,216 @@ class NotionAITool {
1546
1538
  });
1547
1539
  }
1548
1540
  }
1549
- // Helper function to properly extract list items handling nested <li> tags
1550
- static extractListItems(content) {
1541
+ // Extract list items with proper branching structure - only process top-level <li> tags
1542
+ static extractListItemsWithBranching(content) {
1551
1543
  const items = [];
1552
- let currentPos = 0;
1553
- while (currentPos < content.length) {
1554
- // Find the next <li> opening tag
1555
- const liStart = content.indexOf('<li', currentPos);
1544
+ let pos = 0;
1545
+ while (pos < content.length) {
1546
+ // Find next <li> tag at the current level
1547
+ const liStart = content.indexOf('<li', pos);
1556
1548
  if (liStart === -1)
1557
1549
  break;
1558
- // Find the end of the opening tag
1559
- const openTagEnd = content.indexOf('>', liStart);
1560
- if (openTagEnd === -1)
1550
+ const liOpenEnd = content.indexOf('>', liStart);
1551
+ if (liOpenEnd === -1)
1561
1552
  break;
1562
- // Now find the matching closing </li> tag accounting for nesting
1563
- let depth = 1;
1564
- let pos = openTagEnd + 1;
1565
- let itemEnd = -1;
1566
- while (pos < content.length && depth > 0) {
1567
- const nextLiOpen = content.indexOf('<li', pos);
1568
- const nextLiClose = content.indexOf('</li>', pos);
1569
- // If no more closing tags, we're done
1553
+ // Find the matching </li> using proper depth tracking for nested tags
1554
+ let depth = 0;
1555
+ let searchPos = liOpenEnd + 1; // Start after the opening <li> tag
1556
+ let liEnd = -1;
1557
+ while (searchPos < content.length) {
1558
+ const nextLiOpen = content.indexOf('<li', searchPos);
1559
+ const nextLiClose = content.indexOf('</li>', searchPos);
1560
+ // Handle case where no more closing tags
1570
1561
  if (nextLiClose === -1)
1571
1562
  break;
1572
- // If there's an opening tag before the next closing tag, increase depth
1563
+ // If there's an opening tag before the next closing tag
1573
1564
  if (nextLiOpen !== -1 && nextLiOpen < nextLiClose) {
1574
1565
  depth++;
1575
- pos = nextLiOpen + 3; // Move past '<li'
1566
+ searchPos = nextLiOpen + 3; // Move past '<li'
1576
1567
  }
1577
1568
  else {
1578
1569
  // Found a closing tag
1579
- depth--;
1580
1570
  if (depth === 0) {
1581
- itemEnd = nextLiClose + 5; // Include the '</li>'
1571
+ // This is our matching closing tag
1572
+ liEnd = nextLiClose;
1573
+ break;
1574
+ }
1575
+ else {
1576
+ // This closing tag belongs to a nested li
1577
+ depth--;
1578
+ searchPos = nextLiClose + 5; // Move past '</li>'
1579
+ }
1580
+ }
1581
+ }
1582
+ if (liEnd === -1) {
1583
+ // No matching closing tag found
1584
+ pos = liOpenEnd + 1;
1585
+ continue;
1586
+ }
1587
+ // Extract the content between <li> and </li>
1588
+ const fullItemContent = content.substring(liOpenEnd + 1, liEnd);
1589
+ if (!fullItemContent.trim()) {
1590
+ pos = liEnd + 5;
1591
+ continue;
1592
+ }
1593
+ const item = { text: '', children: [] };
1594
+ // Process the content to separate text from nested lists
1595
+ let contentPos = 0;
1596
+ let textParts = [];
1597
+ while (contentPos < fullItemContent.length) {
1598
+ // Look for the next nested list (ul or ol)
1599
+ const nextUlStart = fullItemContent.indexOf('<ul', contentPos);
1600
+ const nextOlStart = fullItemContent.indexOf('<ol', contentPos);
1601
+ let nextListStart = -1;
1602
+ let listType = '';
1603
+ if (nextUlStart !== -1 && (nextOlStart === -1 || nextUlStart < nextOlStart)) {
1604
+ nextListStart = nextUlStart;
1605
+ listType = 'ul';
1606
+ }
1607
+ else if (nextOlStart !== -1) {
1608
+ nextListStart = nextOlStart;
1609
+ listType = 'ol';
1610
+ }
1611
+ if (nextListStart === -1) {
1612
+ // No more nested lists - add remaining text
1613
+ const remainingText = fullItemContent.substring(contentPos);
1614
+ if (remainingText.trim()) {
1615
+ textParts.push(remainingText);
1616
+ }
1617
+ break;
1618
+ }
1619
+ // Add text before the nested list
1620
+ const textBefore = fullItemContent.substring(contentPos, nextListStart);
1621
+ if (textBefore.trim()) {
1622
+ textParts.push(textBefore);
1623
+ }
1624
+ // Find the end of this nested list
1625
+ const listOpenEnd = fullItemContent.indexOf('>', nextListStart);
1626
+ if (listOpenEnd === -1) {
1627
+ // Malformed list tag
1628
+ textParts.push(fullItemContent.substring(contentPos));
1629
+ break;
1630
+ }
1631
+ // Track depth to find the matching closing tag
1632
+ let listDepth = 1;
1633
+ let listSearchPos = listOpenEnd + 1;
1634
+ let listEnd = -1;
1635
+ const openTag = `<${listType}`;
1636
+ const closeTag = `</${listType}>`;
1637
+ while (listSearchPos < fullItemContent.length && listDepth > 0) {
1638
+ const nextListOpen = fullItemContent.indexOf(openTag, listSearchPos);
1639
+ const nextListClose = fullItemContent.indexOf(closeTag, listSearchPos);
1640
+ if (nextListClose === -1)
1582
1641
  break;
1642
+ if (nextListOpen !== -1 && nextListOpen < nextListClose) {
1643
+ listDepth++;
1644
+ listSearchPos = nextListOpen + openTag.length;
1583
1645
  }
1584
1646
  else {
1585
- pos = nextLiClose + 5; // Move past '</li>'
1647
+ listDepth--;
1648
+ if (listDepth === 0) {
1649
+ listEnd = nextListClose + closeTag.length;
1650
+ break;
1651
+ }
1652
+ listSearchPos = nextListClose + closeTag.length;
1586
1653
  }
1587
1654
  }
1655
+ if (listEnd !== -1) {
1656
+ // Extract the content between <ul>/<ol> and </ul>/<ol>
1657
+ const listContent = fullItemContent.substring(listOpenEnd + 1, listEnd - closeTag.length);
1658
+ item.children.push({
1659
+ type: listType,
1660
+ content: listContent
1661
+ });
1662
+ contentPos = listEnd;
1663
+ }
1664
+ else {
1665
+ // Malformed nested list - treat remaining as text
1666
+ textParts.push(fullItemContent.substring(contentPos));
1667
+ break;
1668
+ }
1588
1669
  }
1589
- if (itemEnd !== -1) {
1590
- // Extract the content between <li...> and </li>
1591
- const fullMatch = content.substring(liStart, itemEnd);
1592
- const innerMatch = fullMatch.match(/<li[^>]*>([\s\S]*)<\/li>$/);
1593
- if (innerMatch) {
1594
- items.push(innerMatch[1]);
1670
+ // Combine all text parts and clean them
1671
+ if (textParts.length > 0) {
1672
+ const combinedText = textParts.join(' ').trim();
1673
+ const cleanText = NotionAITool.processNestedHtmlInListItem(combinedText);
1674
+ if (cleanText) {
1675
+ item.text = cleanText;
1595
1676
  }
1596
- currentPos = itemEnd;
1597
1677
  }
1598
- else {
1599
- // Malformed HTML, skip this tag
1600
- currentPos = openTagEnd + 1;
1678
+ // Only add items that have either text or children
1679
+ if (item.text.trim() || item.children.length > 0) {
1680
+ items.push(item);
1681
+ }
1682
+ pos = liEnd + 5; // Move past </li>
1683
+ }
1684
+ return items;
1685
+ }
1686
+ // Helper function to properly extract list items handling nested <li> tags
1687
+ static extractListItems(content) {
1688
+ const items = [];
1689
+ // Use a more robust regex approach that respects nesting
1690
+ // This regex captures the complete <li>...</li> blocks including nested content
1691
+ const liRegex = /<li[^>]*>((?:[^<]|<(?!\/li>))*?(?:<[uo]l[^>]*>[\s\S]*?<\/[uo]l>(?:[^<]|<(?!\/li>))*?)*?)<\/li>/gi;
1692
+ let match;
1693
+ while ((match = liRegex.exec(content)) !== null) {
1694
+ const itemContent = match[1];
1695
+ if (itemContent && itemContent.trim()) {
1696
+ items.push(itemContent.trim());
1697
+ }
1698
+ }
1699
+ // Fallback to the old depth-tracking method if regex fails
1700
+ if (items.length === 0) {
1701
+ let currentPos = 0;
1702
+ while (currentPos < content.length) {
1703
+ // Find the next <li> opening tag
1704
+ const liStart = content.indexOf('<li', currentPos);
1705
+ if (liStart === -1)
1706
+ break;
1707
+ // Find the end of the opening tag
1708
+ const openTagEnd = content.indexOf('>', liStart);
1709
+ if (openTagEnd === -1)
1710
+ break;
1711
+ // Now find the matching closing </li> tag accounting for nesting
1712
+ let depth = 1;
1713
+ let pos = openTagEnd + 1;
1714
+ let itemEnd = -1;
1715
+ while (pos < content.length && depth > 0) {
1716
+ const nextLiOpen = content.indexOf('<li', pos);
1717
+ const nextLiClose = content.indexOf('</li>', pos);
1718
+ // If no more closing tags, we're done
1719
+ if (nextLiClose === -1)
1720
+ break;
1721
+ // If there's an opening tag before the next closing tag, increase depth
1722
+ if (nextLiOpen !== -1 && nextLiOpen < nextLiClose) {
1723
+ depth++;
1724
+ pos = nextLiOpen + 3; // Move past '<li'
1725
+ }
1726
+ else {
1727
+ // Found a closing tag
1728
+ depth--;
1729
+ if (depth === 0) {
1730
+ itemEnd = nextLiClose + 5; // Include the '</li>'
1731
+ break;
1732
+ }
1733
+ else {
1734
+ pos = nextLiClose + 5; // Move past '</li>'
1735
+ }
1736
+ }
1737
+ }
1738
+ if (itemEnd !== -1) {
1739
+ // Extract the content between <li...> and </li>
1740
+ const fullMatch = content.substring(liStart, itemEnd);
1741
+ const innerMatch = fullMatch.match(/<li[^>]*>([\s\S]*)<\/li>$/);
1742
+ if (innerMatch) {
1743
+ items.push(innerMatch[1]);
1744
+ }
1745
+ currentPos = itemEnd;
1746
+ }
1747
+ else {
1748
+ // Malformed HTML, skip this tag
1749
+ currentPos = openTagEnd + 1;
1750
+ }
1601
1751
  }
1602
1752
  }
1603
1753
  return items;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "n8n-nodes-notion-advanced",
3
- "version": "1.2.28-beta",
3
+ "version": "1.2.29-beta",
4
4
  "description": "Advanced n8n Notion nodes: Full-featured workflow node + AI Agent Tool for intelligent Notion automation with 25+ block types (BETA)",
5
5
  "scripts": {},
6
6
  "files": [