n8n-nodes-notion-advanced 1.2.7-beta → 1.2.9-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.NotionAITool = void 0;
4
4
  const n8n_workflow_1 = require("n8n-workflow");
5
+ const crypto_1 = require("crypto");
5
6
  const NotionUtils_1 = require("./NotionUtils");
6
7
  class NotionAITool {
7
8
  constructor() {
@@ -455,8 +456,8 @@ class NotionAITool {
455
456
  for (let i = 0; i < lines.length; i++) {
456
457
  const line = lines[i];
457
458
  const trimmedLine = line.trim();
458
- // Skip completely empty lines and XML placeholders
459
- if (!trimmedLine || trimmedLine.startsWith('__XML_BLOCK_'))
459
+ // Skip completely empty lines and XML placeholders (now using dynamic prefix check)
460
+ if (!trimmedLine || /__XML_[a-f0-9]{8}_\d+__/.test(trimmedLine))
460
461
  continue;
461
462
  // Traditional markdown patterns (for backwards compatibility)
462
463
  if (trimmedLine.startsWith('# ')) {
@@ -641,328 +642,557 @@ class NotionAITool {
641
642
  }
642
643
  return blocks;
643
644
  }
644
- // New XML-like tag processing function
645
+ // Helper function to resolve overlapping tag matches
646
+ static resolveOverlaps(matches) {
647
+ const resolved = [];
648
+ const sorted = matches.sort((a, b) => {
649
+ if (a.start !== b.start)
650
+ return a.start - b.start;
651
+ return (b.end - b.start) - (a.end - a.start); // Prefer longer matches
652
+ });
653
+ for (const match of sorted) {
654
+ const hasOverlap = resolved.some(existing => (match.start < existing.end && match.end > existing.start));
655
+ if (!hasOverlap) {
656
+ resolved.push(match);
657
+ }
658
+ }
659
+ return resolved;
660
+ }
661
+ // Helper function to validate XML tag structure
662
+ static validateXmlTag(match, tagName) {
663
+ try {
664
+ // Basic validation for well-formed tags
665
+ const openTag = new RegExp(`<${tagName}[^>]*>`, 'i');
666
+ const closeTag = new RegExp(`</${tagName}>`, 'i');
667
+ if (!openTag.test(match) || !closeTag.test(match)) {
668
+ console.warn(`Malformed XML tag detected: ${match.substring(0, 50)}...`);
669
+ return false;
670
+ }
671
+ return true;
672
+ }
673
+ catch (error) {
674
+ console.warn(`Error validating XML tag: ${error}`);
675
+ return false;
676
+ }
677
+ }
678
+ // Helper function for optimized string replacement
679
+ static optimizedReplace(content, matches) {
680
+ if (matches.length === 0)
681
+ return content;
682
+ const parts = [];
683
+ let lastIndex = 0;
684
+ matches.forEach(({ start, end, replacement }) => {
685
+ parts.push(content.substring(lastIndex, start));
686
+ parts.push(replacement);
687
+ lastIndex = end;
688
+ });
689
+ parts.push(content.substring(lastIndex));
690
+ return parts.join('');
691
+ }
692
+ // Helper function for Unicode-safe position calculation
693
+ static getUtf8BytePosition(str, charIndex) {
694
+ try {
695
+ return Buffer.from(str.substring(0, charIndex), 'utf8').length;
696
+ }
697
+ catch (error) {
698
+ // Fallback to character index if Buffer operations fail
699
+ return charIndex;
700
+ }
701
+ }
702
+ // Build hierarchical XML tree structure
703
+ static buildXMLTree(content, tagProcessors) {
704
+ const allMatches = [];
705
+ // Collect all XML tags with their positions
706
+ tagProcessors.forEach(({ regex, blockCreator, listProcessor }) => {
707
+ var _a;
708
+ const globalRegex = new RegExp(regex.source, 'gis');
709
+ let match;
710
+ while ((match = globalRegex.exec(content)) !== null) {
711
+ const tagName = ((_a = match[0].match(/<(\w+)/)) === null || _a === void 0 ? void 0 : _a[1]) || 'unknown';
712
+ allMatches.push({
713
+ id: `${tagName}_${match.index}_${Date.now()}_${Math.random()}`,
714
+ tagName,
715
+ start: match.index,
716
+ end: match.index + match[0].length,
717
+ match: match[0],
718
+ processor: blockCreator,
719
+ groups: match.slice(1),
720
+ children: [],
721
+ depth: 0,
722
+ innerContent: match[0],
723
+ replacement: undefined,
724
+ listProcessor
725
+ });
726
+ }
727
+ });
728
+ // Sort by start position
729
+ allMatches.sort((a, b) => a.start - b.start);
730
+ // Build parent-child relationships
731
+ const rootNodes = [];
732
+ const nodeStack = [];
733
+ for (const node of allMatches) {
734
+ // Pop nodes from stack that don't contain this node
735
+ while (nodeStack.length > 0 && nodeStack[nodeStack.length - 1].end <= node.start) {
736
+ nodeStack.pop();
737
+ }
738
+ // Set depth based on stack size
739
+ node.depth = nodeStack.length;
740
+ // If there's a parent on the stack, add this as its child
741
+ if (nodeStack.length > 0) {
742
+ const parent = nodeStack[nodeStack.length - 1];
743
+ node.parent = parent;
744
+ parent.children.push(node);
745
+ }
746
+ else {
747
+ // This is a root node
748
+ rootNodes.push(node);
749
+ }
750
+ // Only push self-contained tags to stack (not self-closing)
751
+ if (!node.match.endsWith('/>') && node.match.includes('</')) {
752
+ nodeStack.push(node);
753
+ }
754
+ }
755
+ return rootNodes;
756
+ }
757
+ // Process XML tree depth-first (children before parents)
758
+ static processXMLTreeDepthFirst(nodes, blocks, placeholderPrefix) {
759
+ const replacements = new Map();
760
+ let blockCounter = 0;
761
+ const processNode = (node) => {
762
+ // First, process all children depth-first
763
+ for (const child of node.children) {
764
+ const childReplacement = processNode(child);
765
+ replacements.set(child.id, childReplacement);
766
+ }
767
+ // Extract inner content (content between opening and closing tags)
768
+ let innerContent = node.innerContent;
769
+ // Extract content between opening and closing tags
770
+ const openTagMatch = node.match.match(/^<[^>]+>/);
771
+ const closeTagMatch = node.match.match(/<\/[^>]+>$/);
772
+ if (openTagMatch && closeTagMatch) {
773
+ const openTag = openTagMatch[0];
774
+ const closeTag = closeTagMatch[0];
775
+ const startIndex = node.match.indexOf(openTag) + openTag.length;
776
+ const endIndex = node.match.lastIndexOf(closeTag);
777
+ innerContent = node.match.substring(startIndex, endIndex);
778
+ // Replace child nodes in inner content with their processed content
779
+ for (const child of node.children) {
780
+ const childReplacement = replacements.get(child.id) || '';
781
+ innerContent = innerContent.replace(child.match, childReplacement);
782
+ }
783
+ }
784
+ // Process this node with updated inner content
785
+ try {
786
+ // Handle special list processors
787
+ if (node.listProcessor && (node.tagName === 'ul' || node.tagName === 'ol')) {
788
+ node.listProcessor(innerContent, blocks);
789
+ return `${placeholderPrefix}${blockCounter++}__`;
790
+ }
791
+ // Use blockCreator to create the block
792
+ const block = node.processor(...node.groups);
793
+ if (block) {
794
+ blocks.push(block);
795
+ }
796
+ return `${placeholderPrefix}${blockCounter++}__`;
797
+ }
798
+ catch (error) {
799
+ console.warn(`Error processing XML node ${node.tagName}:`, error);
800
+ return node.match; // Return original if processing fails
801
+ }
802
+ };
803
+ // Process all root nodes
804
+ for (const rootNode of nodes) {
805
+ const replacement = processNode(rootNode);
806
+ replacements.set(rootNode.id, replacement);
807
+ }
808
+ return replacements;
809
+ }
810
+ // Apply hierarchical replacements to content
811
+ static applyHierarchicalReplacements(content, nodes, replacements) {
812
+ let processedContent = content;
813
+ // Sort nodes by start position in reverse order to avoid position shifts
814
+ const allNodes = this.getAllNodesFromTree(nodes);
815
+ allNodes.sort((a, b) => b.start - a.start);
816
+ // Apply replacements from end to beginning
817
+ for (const node of allNodes) {
818
+ const replacement = replacements.get(node.id);
819
+ if (replacement !== undefined) {
820
+ processedContent = processedContent.substring(0, node.start) +
821
+ replacement +
822
+ processedContent.substring(node.end);
823
+ }
824
+ }
825
+ return processedContent;
826
+ }
827
+ // Helper function to get all nodes from tree (flattened)
828
+ static getAllNodesFromTree(nodes) {
829
+ const allNodes = [];
830
+ const collectNodes = (nodeList) => {
831
+ for (const node of nodeList) {
832
+ allNodes.push(node);
833
+ collectNodes(node.children);
834
+ }
835
+ };
836
+ collectNodes(nodes);
837
+ return allNodes;
838
+ }
839
+ // New hierarchical XML-like tag processing function
645
840
  static processXmlTags(content, blocks) {
646
841
  let processedContent = content;
647
- let blockCounter = 0;
648
- // Process XML-like tags in order of priority
842
+ // Generate unique placeholder prefix to avoid collisions
843
+ const placeholderPrefix = `__XML_${(0, crypto_1.randomUUID)().slice(0, 8)}_`;
844
+ // Debug mode for development
845
+ const DEBUG_ORDERING = process.env.NODE_ENV === 'development';
846
+ // Define all tag processors
649
847
  const tagProcessors = [
650
848
  // Callouts: <callout type="info">content</callout>
651
849
  {
652
850
  regex: /<callout\s*(?:type="([^"]*)")?\s*>(.*?)<\/callout>/gis,
653
- processor: (match, type = 'info', content) => {
851
+ blockCreator: (type = 'info', content) => {
654
852
  const emoji = NotionAITool.getCalloutEmoji(type.toLowerCase());
655
853
  const color = NotionAITool.getCalloutColor(type.toLowerCase());
656
- blocks.push({
854
+ return {
657
855
  type: 'callout',
658
856
  callout: {
659
857
  rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
660
858
  icon: { type: 'emoji', emoji },
661
859
  color: color,
662
860
  },
663
- });
664
- return `__XML_BLOCK_${blockCounter++}__`;
861
+ };
665
862
  }
666
863
  },
667
864
  // Code blocks: <code language="javascript">content</code>
668
865
  {
669
866
  regex: /<code\s*(?:language="([^"]*)")?\s*>(.*?)<\/code>/gis,
670
- processor: (match, language = 'plain_text', content) => {
671
- blocks.push({
867
+ blockCreator: (language = 'plain_text', content) => {
868
+ return {
672
869
  type: 'code',
673
870
  code: {
674
871
  rich_text: [(0, NotionUtils_1.createRichText)(content.trim())],
675
872
  language: language === 'plain text' ? 'plain_text' : language,
676
873
  },
677
- });
678
- return `__XML_BLOCK_${blockCounter++}__`;
874
+ };
679
875
  }
680
876
  },
681
877
  // Images: <image src="url" alt="description">caption</image>
682
878
  {
683
879
  regex: /<image\s+src="([^"]*)"(?:\s+alt="([^"]*)")?\s*>(.*?)<\/image>/gis,
684
- processor: (match, src, alt = '', caption = '') => {
880
+ blockCreator: (src, alt = '', caption = '') => {
685
881
  const captionText = caption.trim() || alt;
686
- blocks.push({
882
+ return {
687
883
  type: 'image',
688
884
  image: {
689
885
  type: 'external',
690
886
  external: { url: src },
691
887
  caption: captionText ? NotionAITool.parseBasicMarkdown(captionText) : [],
692
888
  },
693
- });
694
- return `__XML_BLOCK_${blockCounter++}__`;
889
+ };
695
890
  }
696
891
  },
697
892
  // Self-closing images: <image src="url" alt="description"/>
698
893
  {
699
894
  regex: /<image\s+src="([^"]*)"(?:\s+alt="([^"]*)")?\s*\/>/gis,
700
- processor: (match, src, alt = '') => {
701
- blocks.push({
895
+ blockCreator: (src, alt = '') => {
896
+ return {
702
897
  type: 'image',
703
898
  image: {
704
899
  type: 'external',
705
900
  external: { url: src },
706
901
  caption: alt ? NotionAITool.parseBasicMarkdown(alt) : [],
707
902
  },
708
- });
709
- return `__XML_BLOCK_${blockCounter++}__`;
903
+ };
710
904
  }
711
905
  },
712
906
  // Equations: <equation>E=mc^2</equation>
713
907
  {
714
908
  regex: /<equation>(.*?)<\/equation>/gis,
715
- processor: (match, expression) => {
716
- blocks.push({
909
+ blockCreator: (expression) => {
910
+ return {
717
911
  type: 'equation',
718
912
  equation: {
719
913
  expression: expression.trim(),
720
914
  },
721
- });
722
- return `__XML_BLOCK_${blockCounter++}__`;
915
+ };
723
916
  }
724
917
  },
725
918
  // Embeds: <embed>url</embed>
726
919
  {
727
920
  regex: /<embed>(.*?)<\/embed>/gis,
728
- processor: (match, url) => {
729
- blocks.push({
921
+ blockCreator: (url) => {
922
+ return {
730
923
  type: 'embed',
731
924
  embed: {
732
925
  url: url.trim(),
733
926
  },
734
- });
735
- return `__XML_BLOCK_${blockCounter++}__`;
927
+ };
736
928
  }
737
929
  },
738
930
  // Bookmarks: <bookmark>url</bookmark>
739
931
  {
740
932
  regex: /<bookmark>(.*?)<\/bookmark>/gis,
741
- processor: (match, url) => {
742
- blocks.push({
933
+ blockCreator: (url) => {
934
+ return {
743
935
  type: 'bookmark',
744
936
  bookmark: {
745
937
  url: url.trim(),
746
938
  },
747
- });
748
- return `__XML_BLOCK_${blockCounter++}__`;
939
+ };
749
940
  }
750
941
  },
751
942
  // Toggles: <toggle>title</toggle>
752
943
  {
753
944
  regex: /<toggle>(.*?)<\/toggle>/gis,
754
- processor: (match, title) => {
755
- blocks.push({
945
+ blockCreator: (title) => {
946
+ return {
756
947
  type: 'toggle',
757
948
  toggle: {
758
949
  rich_text: NotionAITool.parseBasicMarkdown(title.trim()),
759
950
  children: [],
760
951
  },
761
- });
762
- return `__XML_BLOCK_${blockCounter++}__`;
952
+ };
763
953
  }
764
954
  },
765
955
  // Quotes: <quote>content</quote>
766
956
  {
767
957
  regex: /<quote>(.*?)<\/quote>/gis,
768
- processor: (match, content) => {
769
- blocks.push({
958
+ blockCreator: (content) => {
959
+ return {
770
960
  type: 'quote',
771
961
  quote: {
772
962
  rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
773
963
  },
774
- });
775
- return `__XML_BLOCK_${blockCounter++}__`;
964
+ };
776
965
  }
777
966
  },
778
967
  // Dividers: <divider/> or <divider></divider>
779
968
  {
780
969
  regex: /<divider\s*\/?>/gis,
781
- processor: (match) => {
782
- blocks.push({
970
+ blockCreator: () => {
971
+ return {
783
972
  type: 'divider',
784
973
  divider: {},
785
- });
786
- return `__XML_BLOCK_${blockCounter++}__`;
974
+ };
787
975
  }
788
976
  },
789
977
  // To-do items: <todo checked="true">content</todo>
790
978
  {
791
979
  regex: /<todo\s*(?:checked="([^"]*)")?\s*>(.*?)<\/todo>/gis,
792
- processor: (match, checked = 'false', content) => {
980
+ blockCreator: (checked = 'false', content) => {
793
981
  const isChecked = checked.toLowerCase() === 'true';
794
- blocks.push({
982
+ return {
795
983
  type: 'to_do',
796
984
  to_do: {
797
985
  rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
798
986
  checked: isChecked,
799
987
  },
800
- });
801
- return `__XML_BLOCK_${blockCounter++}__`;
988
+ };
802
989
  }
803
990
  },
804
991
  // Headings: <h1>content</h1>, <h2>content</h2>, <h3>content</h3>
805
992
  {
806
993
  regex: /<h([123])>(.*?)<\/h[123]>/gis,
807
- processor: (match, level, content) => {
994
+ blockCreator: (level, content) => {
808
995
  const headingType = `heading_${level}`;
809
- blocks.push({
996
+ return {
810
997
  type: headingType,
811
998
  [headingType]: {
812
999
  rich_text: [(0, NotionUtils_1.createRichText)(content.trim())],
813
1000
  },
814
- });
815
- return `__XML_BLOCK_${blockCounter++}__`;
1001
+ };
816
1002
  }
817
1003
  },
818
1004
  // Paragraphs: <p>content</p>
819
1005
  {
820
1006
  regex: /<p>(.*?)<\/p>/gis,
821
- processor: (match, content) => {
822
- blocks.push({
1007
+ blockCreator: (content) => {
1008
+ return {
823
1009
  type: 'paragraph',
824
1010
  paragraph: {
825
1011
  rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
826
1012
  },
827
- });
828
- return `__XML_BLOCK_${blockCounter++}__`;
1013
+ };
829
1014
  }
830
1015
  },
831
1016
  // Process complete bulleted lists first: <ul><li>item</li></ul>
832
1017
  {
833
1018
  regex: /<ul\s*[^>]*>(.*?)<\/ul>/gis,
834
- processor: (match, listContent) => {
835
- // Extract individual list items and process them
836
- const items = listContent.match(/<li\s*[^>]*>(.*?)<\/li>/gis) || [];
837
- items.forEach(item => {
838
- const itemContent = item.replace(/<\/?li[^>]*>/gi, '').trim();
839
- if (itemContent) {
840
- blocks.push({
841
- type: 'bulleted_list_item',
842
- bulleted_list_item: {
843
- rich_text: NotionAITool.parseBasicMarkdown(itemContent),
844
- },
845
- });
846
- }
847
- });
848
- return `__XML_BLOCK_${blockCounter++}__`;
1019
+ blockCreator: (listContent) => {
1020
+ // This will be handled specially in hierarchical processing
1021
+ return null;
1022
+ },
1023
+ listProcessor: (listContent, blocks) => {
1024
+ NotionAITool.processNestedList(listContent, 'bulleted_list_item', blocks);
849
1025
  }
850
1026
  },
851
1027
  // Process complete numbered lists first: <ol><li>item</li></ol>
852
1028
  {
853
1029
  regex: /<ol\s*[^>]*>(.*?)<\/ol>/gis,
854
- processor: (match, listContent) => {
855
- // Extract individual list items and process them
856
- const items = listContent.match(/<li\s*[^>]*>(.*?)<\/li>/gis) || [];
857
- items.forEach(item => {
858
- const itemContent = item.replace(/<\/?li[^>]*>/gi, '').trim();
859
- if (itemContent) {
860
- blocks.push({
861
- type: 'numbered_list_item',
862
- numbered_list_item: {
863
- rich_text: NotionAITool.parseBasicMarkdown(itemContent),
864
- },
865
- });
866
- }
867
- });
868
- return `__XML_BLOCK_${blockCounter++}__`;
1030
+ blockCreator: (listContent) => {
1031
+ // This will be handled specially in hierarchical processing
1032
+ return null;
1033
+ },
1034
+ listProcessor: (listContent, blocks) => {
1035
+ NotionAITool.processNestedList(listContent, 'numbered_list_item', blocks);
869
1036
  }
870
1037
  },
871
1038
  // Blockquotes: <blockquote>content</blockquote>
872
1039
  {
873
1040
  regex: /<blockquote>(.*?)<\/blockquote>/gis,
874
- processor: (match, content) => {
875
- blocks.push({
1041
+ blockCreator: (content) => {
1042
+ return {
876
1043
  type: 'quote',
877
1044
  quote: {
878
1045
  rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
879
1046
  },
880
- });
881
- return `__XML_BLOCK_${blockCounter++}__`;
1047
+ };
882
1048
  }
883
1049
  },
884
1050
  // Preformatted text: <pre>content</pre>
885
1051
  {
886
1052
  regex: /<pre>(.*?)<\/pre>/gis,
887
- processor: (match, content) => {
888
- blocks.push({
1053
+ blockCreator: (content) => {
1054
+ return {
889
1055
  type: 'code',
890
1056
  code: {
891
1057
  rich_text: [(0, NotionUtils_1.createRichText)(content.trim())],
892
1058
  language: 'plain_text',
893
1059
  },
894
- });
895
- return `__XML_BLOCK_${blockCounter++}__`;
1060
+ };
896
1061
  }
897
1062
  },
898
1063
  // Standalone list items (only if not already processed in lists): <li>content</li>
899
1064
  {
900
1065
  regex: /<li\s*[^>]*>(.*?)<\/li>/gis,
901
- processor: (match, content) => {
1066
+ blockCreator: (content) => {
902
1067
  if (content.trim()) {
903
- blocks.push({
1068
+ return {
904
1069
  type: 'bulleted_list_item',
905
1070
  bulleted_list_item: {
906
1071
  rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
907
1072
  },
908
- });
1073
+ };
909
1074
  }
910
- return `__XML_BLOCK_${blockCounter++}__`;
1075
+ return null;
911
1076
  }
912
1077
  },
913
1078
  // Strong/Bold: <strong>content</strong> or <b>content</b> (only as standalone)
914
1079
  {
915
1080
  regex: /(?:^|>|\s)<(strong|b)>(.*?)<\/(strong|b)>(?=<|$|\s)/gis,
916
- processor: (match, tag, content) => {
917
- blocks.push({
1081
+ blockCreator: (tag, content) => {
1082
+ return {
918
1083
  type: 'paragraph',
919
1084
  paragraph: {
920
1085
  rich_text: NotionAITool.parseBasicMarkdown(`**${content.trim()}**`),
921
1086
  },
922
- });
923
- return `__XML_BLOCK_${blockCounter++}__`;
1087
+ };
924
1088
  }
925
1089
  },
926
1090
  // Emphasis/Italic: <em>content</em> or <i>content</i> (only as standalone)
927
1091
  {
928
1092
  regex: /(?:^|>|\s)<(em|i)>(.*?)<\/(em|i)>(?=<|$|\s)/gis,
929
- processor: (match, tag, content) => {
930
- blocks.push({
1093
+ blockCreator: (tag, content) => {
1094
+ return {
931
1095
  type: 'paragraph',
932
1096
  paragraph: {
933
1097
  rich_text: NotionAITool.parseBasicMarkdown(`*${content.trim()}*`),
934
1098
  },
935
- });
936
- return `__XML_BLOCK_${blockCounter++}__`;
1099
+ };
937
1100
  }
938
1101
  },
939
1102
  // Line breaks: <br/> or <br>
940
1103
  {
941
1104
  regex: /<br\s*\/?>/gis,
942
- processor: (match) => {
943
- blocks.push({
1105
+ blockCreator: () => {
1106
+ return {
944
1107
  type: 'paragraph',
945
1108
  paragraph: {
946
1109
  rich_text: [(0, NotionUtils_1.createRichText)('')],
947
1110
  },
948
- });
949
- return `__XML_BLOCK_${blockCounter++}__`;
1111
+ };
950
1112
  }
951
1113
  },
952
1114
  ];
953
- // Process each tag type
954
- tagProcessors.forEach(({ regex, processor }) => {
955
- processedContent = processedContent.replace(regex, (match, group1, group2, group3) => {
956
- return processor(match, group1 || '', group2 || '', group3 || '');
1115
+ try {
1116
+ // Step 1: Build hierarchical XML tree
1117
+ const xmlTree = NotionAITool.buildXMLTree(processedContent, tagProcessors);
1118
+ if (DEBUG_ORDERING && xmlTree.length > 0) {
1119
+ console.log('XML Tree Structure:', xmlTree.map(node => ({
1120
+ tag: node.tagName,
1121
+ depth: node.depth,
1122
+ children: node.children.length,
1123
+ start: node.start
1124
+ })));
1125
+ }
1126
+ // Step 2: Process tree depth-first (children before parents)
1127
+ const replacements = NotionAITool.processXMLTreeDepthFirst(xmlTree, blocks, placeholderPrefix);
1128
+ // Step 3: Apply hierarchical replacements to content
1129
+ processedContent = NotionAITool.applyHierarchicalReplacements(processedContent, xmlTree, replacements);
1130
+ // Step 4: Clean up any remaining HTML tags
1131
+ processedContent = NotionAITool.cleanupRemainingHtml(processedContent, placeholderPrefix);
1132
+ if (DEBUG_ORDERING) {
1133
+ console.log(`Processed ${xmlTree.length} root XML nodes hierarchically, created ${blocks.length} blocks`);
1134
+ }
1135
+ }
1136
+ catch (error) {
1137
+ console.warn('Error in hierarchical XML processing, falling back to linear processing:', error);
1138
+ // Fallback to linear processing if hierarchical fails
1139
+ const allMatches = [];
1140
+ tagProcessors.forEach(({ regex, blockCreator }) => {
1141
+ const globalRegex = new RegExp(regex.source, 'gis');
1142
+ let match;
1143
+ while ((match = globalRegex.exec(processedContent)) !== null) {
1144
+ allMatches.push({
1145
+ start: match.index,
1146
+ end: match.index + match[0].length,
1147
+ match: match[0],
1148
+ processor: (match, group1, group2, group3) => {
1149
+ try {
1150
+ const block = blockCreator(group1 || '', group2 || '', group3 || '');
1151
+ if (block) {
1152
+ blocks.push(block);
1153
+ }
1154
+ return `${placeholderPrefix}${Math.random()}__`;
1155
+ }
1156
+ catch (error) {
1157
+ console.warn('Error in fallback processor:', error);
1158
+ return match;
1159
+ }
1160
+ },
1161
+ groups: match.slice(1)
1162
+ });
1163
+ }
957
1164
  });
958
- });
959
- // Clean up any remaining HTML tags that weren't processed
960
- processedContent = NotionAITool.cleanupRemainingHtml(processedContent);
1165
+ const resolvedMatches = NotionAITool.resolveOverlaps(allMatches);
1166
+ resolvedMatches.sort((a, b) => a.start - b.start);
1167
+ const processedMatches = resolvedMatches.map(({ start, end, match, processor, groups }) => {
1168
+ try {
1169
+ const replacement = processor(match, groups[0] || '', groups[1] || '', groups[2] || '');
1170
+ return { start, end, replacement, match };
1171
+ }
1172
+ catch (error) {
1173
+ return { start, end, replacement: match, match };
1174
+ }
1175
+ });
1176
+ if (processedMatches.length > 0) {
1177
+ processedContent = NotionAITool.optimizedReplace(processedContent, processedMatches);
1178
+ }
1179
+ processedContent = NotionAITool.cleanupRemainingHtml(processedContent, placeholderPrefix);
1180
+ }
961
1181
  return processedContent;
962
1182
  }
963
- // Cleanup function to remove remaining HTML tags
964
- static cleanupRemainingHtml(content) {
1183
+ // Cleanup function to remove remaining HTML tags and XML_BLOCK artifacts
1184
+ static cleanupRemainingHtml(content, placeholderPrefix) {
965
1185
  let cleaned = content;
1186
+ // Remove XML_BLOCK placeholder artifacts (support both old and new format)
1187
+ if (placeholderPrefix) {
1188
+ const placeholderRegex = new RegExp(`${placeholderPrefix.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\d+__`, 'g');
1189
+ cleaned = cleaned.replace(placeholderRegex, '');
1190
+ }
1191
+ else {
1192
+ // Fallback for backward compatibility
1193
+ cleaned = cleaned.replace(/__XML_BLOCK_\d+__/g, '');
1194
+ cleaned = cleaned.replace(/__XML_[a-f0-9]{8}_\d+__/g, '');
1195
+ }
966
1196
  // Remove common HTML tags that might be left behind
967
1197
  const htmlTagsToRemove = [
968
1198
  /<\/?ul\s*[^>]*>/gi,
@@ -974,6 +1204,13 @@ class NotionAITool {
974
1204
  /<\/?i\s*[^>]*>/gi,
975
1205
  /<\/?div\s*[^>]*>/gi,
976
1206
  /<\/?span\s*[^>]*>/gi,
1207
+ /<\/?p\s*[^>]*>/gi,
1208
+ /<\/?a\s*[^>]*>/gi,
1209
+ /<\/?code\s*[^>]*>/gi,
1210
+ /<\/?u\s*[^>]*>/gi,
1211
+ /<\/?s\s*[^>]*>/gi,
1212
+ /<\/?del\s*[^>]*>/gi,
1213
+ /<\/?mark\s*[^>]*>/gi,
977
1214
  /<br\s*\/?>/gi,
978
1215
  ];
979
1216
  htmlTagsToRemove.forEach(regex => {
@@ -983,8 +1220,131 @@ class NotionAITool {
983
1220
  cleaned = cleaned.replace(/^\s*[\r\n]/gm, '');
984
1221
  // Remove multiple consecutive line breaks
985
1222
  cleaned = cleaned.replace(/\n{3,}/g, '\n\n');
1223
+ // Remove lines that contain only XML_BLOCK artifacts
1224
+ cleaned = cleaned.replace(/^.*__XML_BLOCK_\d+__.*$/gm, '');
1225
+ cleaned = cleaned.replace(/^.*__XML_[a-f0-9]{8}_\d+__.*$/gm, '');
986
1226
  return cleaned.trim();
987
1227
  }
1228
+ // Helper function to process nested HTML elements in list items
1229
+ static processNestedHtmlInListItem(content) {
1230
+ let processed = content;
1231
+ // First, remove wrapping <p> tags (common in nested content)
1232
+ processed = processed.replace(/^<p\s*[^>]*>(.*?)<\/p>$/gis, '$1');
1233
+ // Convert HTML formatting tags to markdown equivalents
1234
+ const htmlToMarkdown = [
1235
+ { regex: /<strong\s*[^>]*>(.*?)<\/strong>/gis, replacement: '**$1**' },
1236
+ { regex: /<b\s*[^>]*>(.*?)<\/b>/gis, replacement: '**$1**' },
1237
+ { regex: /<em\s*[^>]*>(.*?)<\/em>/gis, replacement: '*$1*' },
1238
+ { regex: /<i\s*[^>]*>(.*?)<\/i>/gis, replacement: '*$1*' },
1239
+ { regex: /<code\s*[^>]*>(.*?)<\/code>/gis, replacement: '`$1`' },
1240
+ { regex: /<a\s+href="([^"]*)"[^>]*>(.*?)<\/a>/gis, replacement: '[$2]($1)' },
1241
+ { regex: /<u\s*[^>]*>(.*?)<\/u>/gis, replacement: '$1' }, // Notion doesn't support underline
1242
+ { regex: /<s\s*[^>]*>(.*?)<\/s>/gis, replacement: '~~$1~~' },
1243
+ { regex: /<del\s*[^>]*>(.*?)<\/del>/gis, replacement: '~~$1~~' },
1244
+ { regex: /<mark\s*[^>]*>(.*?)<\/mark>/gis, replacement: '$1' }, // Notion doesn't support highlight in rich text
1245
+ ];
1246
+ // Apply HTML to markdown conversions
1247
+ htmlToMarkdown.forEach(({ regex, replacement }) => {
1248
+ processed = processed.replace(regex, replacement);
1249
+ });
1250
+ // Remove any remaining HTML tags that we don't handle
1251
+ const tagsToRemove = [
1252
+ /<\/?div\s*[^>]*>/gi,
1253
+ /<\/?span\s*[^>]*>/gi,
1254
+ /<\/?p\s*[^>]*>/gi,
1255
+ /<br\s*\/?>/gi,
1256
+ ];
1257
+ tagsToRemove.forEach(regex => {
1258
+ processed = processed.replace(regex, ' ');
1259
+ });
1260
+ // Clean up extra whitespace
1261
+ processed = processed.replace(/\s+/g, ' ').trim();
1262
+ return processed;
1263
+ }
1264
+ // Helper function to process nested lists and flatten them for Notion
1265
+ static processNestedList(listContent, listType, blocks) {
1266
+ // Extract top-level list items using a more careful approach
1267
+ const items = [];
1268
+ let currentPos = 0;
1269
+ while (currentPos < listContent.length) {
1270
+ const liStart = listContent.indexOf('<li', currentPos);
1271
+ if (liStart === -1)
1272
+ break;
1273
+ const liEndTag = listContent.indexOf('>', liStart);
1274
+ if (liEndTag === -1)
1275
+ break;
1276
+ // Find the matching closing </li> tag, accounting for nested content
1277
+ let depth = 1;
1278
+ let searchPos = liEndTag + 1;
1279
+ let liEnd = -1;
1280
+ while (searchPos < listContent.length && depth > 0) {
1281
+ const nextLiStart = listContent.indexOf('<li', searchPos);
1282
+ const nextLiEnd = listContent.indexOf('</li>', searchPos);
1283
+ if (nextLiEnd === -1)
1284
+ break;
1285
+ if (nextLiStart !== -1 && nextLiStart < nextLiEnd) {
1286
+ depth++;
1287
+ searchPos = nextLiStart + 3;
1288
+ }
1289
+ else {
1290
+ depth--;
1291
+ if (depth === 0) {
1292
+ liEnd = nextLiEnd;
1293
+ }
1294
+ searchPos = nextLiEnd + 5;
1295
+ }
1296
+ }
1297
+ if (liEnd === -1)
1298
+ break;
1299
+ // Extract the full <li>...</li> content
1300
+ const fullItem = listContent.substring(liStart, liEnd + 5);
1301
+ items.push(fullItem);
1302
+ currentPos = liEnd + 5;
1303
+ }
1304
+ // Process each top-level item
1305
+ items.forEach(item => {
1306
+ // Remove the outer <li> tags
1307
+ let itemContent = item.replace(/^<li[^>]*>/, '').replace(/<\/li>$/, '').trim();
1308
+ // Check if this item contains nested lists
1309
+ const hasNestedList = /<[uo]l\s*[^>]*>/i.test(itemContent);
1310
+ if (hasNestedList) {
1311
+ // Extract the text before the nested list
1312
+ const beforeNestedList = itemContent.replace(/<[uo]l\s*[^>]*>.*$/is, '').trim();
1313
+ if (beforeNestedList) {
1314
+ // Clean up and add the main item
1315
+ const cleanContent = NotionAITool.processNestedHtmlInListItem(beforeNestedList);
1316
+ if (cleanContent) {
1317
+ blocks.push({
1318
+ type: listType,
1319
+ [listType]: {
1320
+ rich_text: NotionAITool.parseBasicMarkdown(cleanContent),
1321
+ },
1322
+ });
1323
+ }
1324
+ }
1325
+ // Extract and process nested lists
1326
+ const nestedListMatch = itemContent.match(/<([uo]l)\s*[^>]*>(.*?)<\/\1>/is);
1327
+ if (nestedListMatch) {
1328
+ const [, nestedListTag, nestedContent] = nestedListMatch;
1329
+ const nestedListType = nestedListTag === 'ul' ? 'bulleted_list_item' : 'numbered_list_item';
1330
+ // Recursively process nested list
1331
+ NotionAITool.processNestedList(nestedContent, nestedListType, blocks);
1332
+ }
1333
+ }
1334
+ else {
1335
+ // Simple item without nested lists
1336
+ const cleanContent = NotionAITool.processNestedHtmlInListItem(itemContent);
1337
+ if (cleanContent) {
1338
+ blocks.push({
1339
+ type: listType,
1340
+ [listType]: {
1341
+ rich_text: NotionAITool.parseBasicMarkdown(cleanContent),
1342
+ },
1343
+ });
1344
+ }
1345
+ }
1346
+ });
1347
+ }
988
1348
  // Helper function to get callout emoji based on type
989
1349
  static getCalloutEmoji(type) {
990
1350
  const emojiMap = {