n8n-nodes-notion-advanced 1.2.7-beta → 1.2.9-beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/nodes/NotionAdvanced/NotionAITool.node.d.ts +40 -1
- package/dist/nodes/NotionAdvanced/NotionAITool.node.js +480 -120
- package/dist/package.json +1 -1
- package/package.json +1 -1
- package/dist/nodes/NotionAdvanced/NotionAITool.node.ts +0 -611
- package/dist/nodes/NotionAdvanced/NotionAdvanced.node.ts +0 -1022
- package/dist/nodes/NotionAdvanced/NotionUtils.ts +0 -588
- package/dist/nodes/NotionAdvanced/notion.svg +0 -3
- package/dist/types/NotionTypes.ts +0 -411
@@ -2,6 +2,7 @@
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
3
3
|
exports.NotionAITool = void 0;
|
4
4
|
const n8n_workflow_1 = require("n8n-workflow");
|
5
|
+
const crypto_1 = require("crypto");
|
5
6
|
const NotionUtils_1 = require("./NotionUtils");
|
6
7
|
class NotionAITool {
|
7
8
|
constructor() {
|
@@ -455,8 +456,8 @@ class NotionAITool {
|
|
455
456
|
for (let i = 0; i < lines.length; i++) {
|
456
457
|
const line = lines[i];
|
457
458
|
const trimmedLine = line.trim();
|
458
|
-
// Skip completely empty lines and XML placeholders
|
459
|
-
if (!trimmedLine || trimmedLine
|
459
|
+
// Skip completely empty lines and XML placeholders (now using dynamic prefix check)
|
460
|
+
if (!trimmedLine || /__XML_[a-f0-9]{8}_\d+__/.test(trimmedLine))
|
460
461
|
continue;
|
461
462
|
// Traditional markdown patterns (for backwards compatibility)
|
462
463
|
if (trimmedLine.startsWith('# ')) {
|
@@ -641,328 +642,557 @@ class NotionAITool {
|
|
641
642
|
}
|
642
643
|
return blocks;
|
643
644
|
}
|
644
|
-
//
|
645
|
+
// Helper function to resolve overlapping tag matches
|
646
|
+
static resolveOverlaps(matches) {
|
647
|
+
const resolved = [];
|
648
|
+
const sorted = matches.sort((a, b) => {
|
649
|
+
if (a.start !== b.start)
|
650
|
+
return a.start - b.start;
|
651
|
+
return (b.end - b.start) - (a.end - a.start); // Prefer longer matches
|
652
|
+
});
|
653
|
+
for (const match of sorted) {
|
654
|
+
const hasOverlap = resolved.some(existing => (match.start < existing.end && match.end > existing.start));
|
655
|
+
if (!hasOverlap) {
|
656
|
+
resolved.push(match);
|
657
|
+
}
|
658
|
+
}
|
659
|
+
return resolved;
|
660
|
+
}
|
661
|
+
// Helper function to validate XML tag structure
|
662
|
+
static validateXmlTag(match, tagName) {
|
663
|
+
try {
|
664
|
+
// Basic validation for well-formed tags
|
665
|
+
const openTag = new RegExp(`<${tagName}[^>]*>`, 'i');
|
666
|
+
const closeTag = new RegExp(`</${tagName}>`, 'i');
|
667
|
+
if (!openTag.test(match) || !closeTag.test(match)) {
|
668
|
+
console.warn(`Malformed XML tag detected: ${match.substring(0, 50)}...`);
|
669
|
+
return false;
|
670
|
+
}
|
671
|
+
return true;
|
672
|
+
}
|
673
|
+
catch (error) {
|
674
|
+
console.warn(`Error validating XML tag: ${error}`);
|
675
|
+
return false;
|
676
|
+
}
|
677
|
+
}
|
678
|
+
// Helper function for optimized string replacement
|
679
|
+
static optimizedReplace(content, matches) {
|
680
|
+
if (matches.length === 0)
|
681
|
+
return content;
|
682
|
+
const parts = [];
|
683
|
+
let lastIndex = 0;
|
684
|
+
matches.forEach(({ start, end, replacement }) => {
|
685
|
+
parts.push(content.substring(lastIndex, start));
|
686
|
+
parts.push(replacement);
|
687
|
+
lastIndex = end;
|
688
|
+
});
|
689
|
+
parts.push(content.substring(lastIndex));
|
690
|
+
return parts.join('');
|
691
|
+
}
|
692
|
+
// Helper function for Unicode-safe position calculation
|
693
|
+
static getUtf8BytePosition(str, charIndex) {
|
694
|
+
try {
|
695
|
+
return Buffer.from(str.substring(0, charIndex), 'utf8').length;
|
696
|
+
}
|
697
|
+
catch (error) {
|
698
|
+
// Fallback to character index if Buffer operations fail
|
699
|
+
return charIndex;
|
700
|
+
}
|
701
|
+
}
|
702
|
+
// Build hierarchical XML tree structure
|
703
|
+
static buildXMLTree(content, tagProcessors) {
|
704
|
+
const allMatches = [];
|
705
|
+
// Collect all XML tags with their positions
|
706
|
+
tagProcessors.forEach(({ regex, blockCreator, listProcessor }) => {
|
707
|
+
var _a;
|
708
|
+
const globalRegex = new RegExp(regex.source, 'gis');
|
709
|
+
let match;
|
710
|
+
while ((match = globalRegex.exec(content)) !== null) {
|
711
|
+
const tagName = ((_a = match[0].match(/<(\w+)/)) === null || _a === void 0 ? void 0 : _a[1]) || 'unknown';
|
712
|
+
allMatches.push({
|
713
|
+
id: `${tagName}_${match.index}_${Date.now()}_${Math.random()}`,
|
714
|
+
tagName,
|
715
|
+
start: match.index,
|
716
|
+
end: match.index + match[0].length,
|
717
|
+
match: match[0],
|
718
|
+
processor: blockCreator,
|
719
|
+
groups: match.slice(1),
|
720
|
+
children: [],
|
721
|
+
depth: 0,
|
722
|
+
innerContent: match[0],
|
723
|
+
replacement: undefined,
|
724
|
+
listProcessor
|
725
|
+
});
|
726
|
+
}
|
727
|
+
});
|
728
|
+
// Sort by start position
|
729
|
+
allMatches.sort((a, b) => a.start - b.start);
|
730
|
+
// Build parent-child relationships
|
731
|
+
const rootNodes = [];
|
732
|
+
const nodeStack = [];
|
733
|
+
for (const node of allMatches) {
|
734
|
+
// Pop nodes from stack that don't contain this node
|
735
|
+
while (nodeStack.length > 0 && nodeStack[nodeStack.length - 1].end <= node.start) {
|
736
|
+
nodeStack.pop();
|
737
|
+
}
|
738
|
+
// Set depth based on stack size
|
739
|
+
node.depth = nodeStack.length;
|
740
|
+
// If there's a parent on the stack, add this as its child
|
741
|
+
if (nodeStack.length > 0) {
|
742
|
+
const parent = nodeStack[nodeStack.length - 1];
|
743
|
+
node.parent = parent;
|
744
|
+
parent.children.push(node);
|
745
|
+
}
|
746
|
+
else {
|
747
|
+
// This is a root node
|
748
|
+
rootNodes.push(node);
|
749
|
+
}
|
750
|
+
// Only push self-contained tags to stack (not self-closing)
|
751
|
+
if (!node.match.endsWith('/>') && node.match.includes('</')) {
|
752
|
+
nodeStack.push(node);
|
753
|
+
}
|
754
|
+
}
|
755
|
+
return rootNodes;
|
756
|
+
}
|
757
|
+
// Process XML tree depth-first (children before parents)
|
758
|
+
static processXMLTreeDepthFirst(nodes, blocks, placeholderPrefix) {
|
759
|
+
const replacements = new Map();
|
760
|
+
let blockCounter = 0;
|
761
|
+
const processNode = (node) => {
|
762
|
+
// First, process all children depth-first
|
763
|
+
for (const child of node.children) {
|
764
|
+
const childReplacement = processNode(child);
|
765
|
+
replacements.set(child.id, childReplacement);
|
766
|
+
}
|
767
|
+
// Extract inner content (content between opening and closing tags)
|
768
|
+
let innerContent = node.innerContent;
|
769
|
+
// Extract content between opening and closing tags
|
770
|
+
const openTagMatch = node.match.match(/^<[^>]+>/);
|
771
|
+
const closeTagMatch = node.match.match(/<\/[^>]+>$/);
|
772
|
+
if (openTagMatch && closeTagMatch) {
|
773
|
+
const openTag = openTagMatch[0];
|
774
|
+
const closeTag = closeTagMatch[0];
|
775
|
+
const startIndex = node.match.indexOf(openTag) + openTag.length;
|
776
|
+
const endIndex = node.match.lastIndexOf(closeTag);
|
777
|
+
innerContent = node.match.substring(startIndex, endIndex);
|
778
|
+
// Replace child nodes in inner content with their processed content
|
779
|
+
for (const child of node.children) {
|
780
|
+
const childReplacement = replacements.get(child.id) || '';
|
781
|
+
innerContent = innerContent.replace(child.match, childReplacement);
|
782
|
+
}
|
783
|
+
}
|
784
|
+
// Process this node with updated inner content
|
785
|
+
try {
|
786
|
+
// Handle special list processors
|
787
|
+
if (node.listProcessor && (node.tagName === 'ul' || node.tagName === 'ol')) {
|
788
|
+
node.listProcessor(innerContent, blocks);
|
789
|
+
return `${placeholderPrefix}${blockCounter++}__`;
|
790
|
+
}
|
791
|
+
// Use blockCreator to create the block
|
792
|
+
const block = node.processor(...node.groups);
|
793
|
+
if (block) {
|
794
|
+
blocks.push(block);
|
795
|
+
}
|
796
|
+
return `${placeholderPrefix}${blockCounter++}__`;
|
797
|
+
}
|
798
|
+
catch (error) {
|
799
|
+
console.warn(`Error processing XML node ${node.tagName}:`, error);
|
800
|
+
return node.match; // Return original if processing fails
|
801
|
+
}
|
802
|
+
};
|
803
|
+
// Process all root nodes
|
804
|
+
for (const rootNode of nodes) {
|
805
|
+
const replacement = processNode(rootNode);
|
806
|
+
replacements.set(rootNode.id, replacement);
|
807
|
+
}
|
808
|
+
return replacements;
|
809
|
+
}
|
810
|
+
// Apply hierarchical replacements to content
|
811
|
+
static applyHierarchicalReplacements(content, nodes, replacements) {
|
812
|
+
let processedContent = content;
|
813
|
+
// Sort nodes by start position in reverse order to avoid position shifts
|
814
|
+
const allNodes = this.getAllNodesFromTree(nodes);
|
815
|
+
allNodes.sort((a, b) => b.start - a.start);
|
816
|
+
// Apply replacements from end to beginning
|
817
|
+
for (const node of allNodes) {
|
818
|
+
const replacement = replacements.get(node.id);
|
819
|
+
if (replacement !== undefined) {
|
820
|
+
processedContent = processedContent.substring(0, node.start) +
|
821
|
+
replacement +
|
822
|
+
processedContent.substring(node.end);
|
823
|
+
}
|
824
|
+
}
|
825
|
+
return processedContent;
|
826
|
+
}
|
827
|
+
// Helper function to get all nodes from tree (flattened)
|
828
|
+
static getAllNodesFromTree(nodes) {
|
829
|
+
const allNodes = [];
|
830
|
+
const collectNodes = (nodeList) => {
|
831
|
+
for (const node of nodeList) {
|
832
|
+
allNodes.push(node);
|
833
|
+
collectNodes(node.children);
|
834
|
+
}
|
835
|
+
};
|
836
|
+
collectNodes(nodes);
|
837
|
+
return allNodes;
|
838
|
+
}
|
839
|
+
// New hierarchical XML-like tag processing function
|
645
840
|
static processXmlTags(content, blocks) {
|
646
841
|
let processedContent = content;
|
647
|
-
|
648
|
-
|
842
|
+
// Generate unique placeholder prefix to avoid collisions
|
843
|
+
const placeholderPrefix = `__XML_${(0, crypto_1.randomUUID)().slice(0, 8)}_`;
|
844
|
+
// Debug mode for development
|
845
|
+
const DEBUG_ORDERING = process.env.NODE_ENV === 'development';
|
846
|
+
// Define all tag processors
|
649
847
|
const tagProcessors = [
|
650
848
|
// Callouts: <callout type="info">content</callout>
|
651
849
|
{
|
652
850
|
regex: /<callout\s*(?:type="([^"]*)")?\s*>(.*?)<\/callout>/gis,
|
653
|
-
|
851
|
+
blockCreator: (type = 'info', content) => {
|
654
852
|
const emoji = NotionAITool.getCalloutEmoji(type.toLowerCase());
|
655
853
|
const color = NotionAITool.getCalloutColor(type.toLowerCase());
|
656
|
-
|
854
|
+
return {
|
657
855
|
type: 'callout',
|
658
856
|
callout: {
|
659
857
|
rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
|
660
858
|
icon: { type: 'emoji', emoji },
|
661
859
|
color: color,
|
662
860
|
},
|
663
|
-
}
|
664
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
861
|
+
};
|
665
862
|
}
|
666
863
|
},
|
667
864
|
// Code blocks: <code language="javascript">content</code>
|
668
865
|
{
|
669
866
|
regex: /<code\s*(?:language="([^"]*)")?\s*>(.*?)<\/code>/gis,
|
670
|
-
|
671
|
-
|
867
|
+
blockCreator: (language = 'plain_text', content) => {
|
868
|
+
return {
|
672
869
|
type: 'code',
|
673
870
|
code: {
|
674
871
|
rich_text: [(0, NotionUtils_1.createRichText)(content.trim())],
|
675
872
|
language: language === 'plain text' ? 'plain_text' : language,
|
676
873
|
},
|
677
|
-
}
|
678
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
874
|
+
};
|
679
875
|
}
|
680
876
|
},
|
681
877
|
// Images: <image src="url" alt="description">caption</image>
|
682
878
|
{
|
683
879
|
regex: /<image\s+src="([^"]*)"(?:\s+alt="([^"]*)")?\s*>(.*?)<\/image>/gis,
|
684
|
-
|
880
|
+
blockCreator: (src, alt = '', caption = '') => {
|
685
881
|
const captionText = caption.trim() || alt;
|
686
|
-
|
882
|
+
return {
|
687
883
|
type: 'image',
|
688
884
|
image: {
|
689
885
|
type: 'external',
|
690
886
|
external: { url: src },
|
691
887
|
caption: captionText ? NotionAITool.parseBasicMarkdown(captionText) : [],
|
692
888
|
},
|
693
|
-
}
|
694
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
889
|
+
};
|
695
890
|
}
|
696
891
|
},
|
697
892
|
// Self-closing images: <image src="url" alt="description"/>
|
698
893
|
{
|
699
894
|
regex: /<image\s+src="([^"]*)"(?:\s+alt="([^"]*)")?\s*\/>/gis,
|
700
|
-
|
701
|
-
|
895
|
+
blockCreator: (src, alt = '') => {
|
896
|
+
return {
|
702
897
|
type: 'image',
|
703
898
|
image: {
|
704
899
|
type: 'external',
|
705
900
|
external: { url: src },
|
706
901
|
caption: alt ? NotionAITool.parseBasicMarkdown(alt) : [],
|
707
902
|
},
|
708
|
-
}
|
709
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
903
|
+
};
|
710
904
|
}
|
711
905
|
},
|
712
906
|
// Equations: <equation>E=mc^2</equation>
|
713
907
|
{
|
714
908
|
regex: /<equation>(.*?)<\/equation>/gis,
|
715
|
-
|
716
|
-
|
909
|
+
blockCreator: (expression) => {
|
910
|
+
return {
|
717
911
|
type: 'equation',
|
718
912
|
equation: {
|
719
913
|
expression: expression.trim(),
|
720
914
|
},
|
721
|
-
}
|
722
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
915
|
+
};
|
723
916
|
}
|
724
917
|
},
|
725
918
|
// Embeds: <embed>url</embed>
|
726
919
|
{
|
727
920
|
regex: /<embed>(.*?)<\/embed>/gis,
|
728
|
-
|
729
|
-
|
921
|
+
blockCreator: (url) => {
|
922
|
+
return {
|
730
923
|
type: 'embed',
|
731
924
|
embed: {
|
732
925
|
url: url.trim(),
|
733
926
|
},
|
734
|
-
}
|
735
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
927
|
+
};
|
736
928
|
}
|
737
929
|
},
|
738
930
|
// Bookmarks: <bookmark>url</bookmark>
|
739
931
|
{
|
740
932
|
regex: /<bookmark>(.*?)<\/bookmark>/gis,
|
741
|
-
|
742
|
-
|
933
|
+
blockCreator: (url) => {
|
934
|
+
return {
|
743
935
|
type: 'bookmark',
|
744
936
|
bookmark: {
|
745
937
|
url: url.trim(),
|
746
938
|
},
|
747
|
-
}
|
748
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
939
|
+
};
|
749
940
|
}
|
750
941
|
},
|
751
942
|
// Toggles: <toggle>title</toggle>
|
752
943
|
{
|
753
944
|
regex: /<toggle>(.*?)<\/toggle>/gis,
|
754
|
-
|
755
|
-
|
945
|
+
blockCreator: (title) => {
|
946
|
+
return {
|
756
947
|
type: 'toggle',
|
757
948
|
toggle: {
|
758
949
|
rich_text: NotionAITool.parseBasicMarkdown(title.trim()),
|
759
950
|
children: [],
|
760
951
|
},
|
761
|
-
}
|
762
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
952
|
+
};
|
763
953
|
}
|
764
954
|
},
|
765
955
|
// Quotes: <quote>content</quote>
|
766
956
|
{
|
767
957
|
regex: /<quote>(.*?)<\/quote>/gis,
|
768
|
-
|
769
|
-
|
958
|
+
blockCreator: (content) => {
|
959
|
+
return {
|
770
960
|
type: 'quote',
|
771
961
|
quote: {
|
772
962
|
rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
|
773
963
|
},
|
774
|
-
}
|
775
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
964
|
+
};
|
776
965
|
}
|
777
966
|
},
|
778
967
|
// Dividers: <divider/> or <divider></divider>
|
779
968
|
{
|
780
969
|
regex: /<divider\s*\/?>/gis,
|
781
|
-
|
782
|
-
|
970
|
+
blockCreator: () => {
|
971
|
+
return {
|
783
972
|
type: 'divider',
|
784
973
|
divider: {},
|
785
|
-
}
|
786
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
974
|
+
};
|
787
975
|
}
|
788
976
|
},
|
789
977
|
// To-do items: <todo checked="true">content</todo>
|
790
978
|
{
|
791
979
|
regex: /<todo\s*(?:checked="([^"]*)")?\s*>(.*?)<\/todo>/gis,
|
792
|
-
|
980
|
+
blockCreator: (checked = 'false', content) => {
|
793
981
|
const isChecked = checked.toLowerCase() === 'true';
|
794
|
-
|
982
|
+
return {
|
795
983
|
type: 'to_do',
|
796
984
|
to_do: {
|
797
985
|
rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
|
798
986
|
checked: isChecked,
|
799
987
|
},
|
800
|
-
}
|
801
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
988
|
+
};
|
802
989
|
}
|
803
990
|
},
|
804
991
|
// Headings: <h1>content</h1>, <h2>content</h2>, <h3>content</h3>
|
805
992
|
{
|
806
993
|
regex: /<h([123])>(.*?)<\/h[123]>/gis,
|
807
|
-
|
994
|
+
blockCreator: (level, content) => {
|
808
995
|
const headingType = `heading_${level}`;
|
809
|
-
|
996
|
+
return {
|
810
997
|
type: headingType,
|
811
998
|
[headingType]: {
|
812
999
|
rich_text: [(0, NotionUtils_1.createRichText)(content.trim())],
|
813
1000
|
},
|
814
|
-
}
|
815
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
1001
|
+
};
|
816
1002
|
}
|
817
1003
|
},
|
818
1004
|
// Paragraphs: <p>content</p>
|
819
1005
|
{
|
820
1006
|
regex: /<p>(.*?)<\/p>/gis,
|
821
|
-
|
822
|
-
|
1007
|
+
blockCreator: (content) => {
|
1008
|
+
return {
|
823
1009
|
type: 'paragraph',
|
824
1010
|
paragraph: {
|
825
1011
|
rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
|
826
1012
|
},
|
827
|
-
}
|
828
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
1013
|
+
};
|
829
1014
|
}
|
830
1015
|
},
|
831
1016
|
// Process complete bulleted lists first: <ul><li>item</li></ul>
|
832
1017
|
{
|
833
1018
|
regex: /<ul\s*[^>]*>(.*?)<\/ul>/gis,
|
834
|
-
|
835
|
-
//
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
blocks.push({
|
841
|
-
type: 'bulleted_list_item',
|
842
|
-
bulleted_list_item: {
|
843
|
-
rich_text: NotionAITool.parseBasicMarkdown(itemContent),
|
844
|
-
},
|
845
|
-
});
|
846
|
-
}
|
847
|
-
});
|
848
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
1019
|
+
blockCreator: (listContent) => {
|
1020
|
+
// This will be handled specially in hierarchical processing
|
1021
|
+
return null;
|
1022
|
+
},
|
1023
|
+
listProcessor: (listContent, blocks) => {
|
1024
|
+
NotionAITool.processNestedList(listContent, 'bulleted_list_item', blocks);
|
849
1025
|
}
|
850
1026
|
},
|
851
1027
|
// Process complete numbered lists first: <ol><li>item</li></ol>
|
852
1028
|
{
|
853
1029
|
regex: /<ol\s*[^>]*>(.*?)<\/ol>/gis,
|
854
|
-
|
855
|
-
//
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
blocks.push({
|
861
|
-
type: 'numbered_list_item',
|
862
|
-
numbered_list_item: {
|
863
|
-
rich_text: NotionAITool.parseBasicMarkdown(itemContent),
|
864
|
-
},
|
865
|
-
});
|
866
|
-
}
|
867
|
-
});
|
868
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
1030
|
+
blockCreator: (listContent) => {
|
1031
|
+
// This will be handled specially in hierarchical processing
|
1032
|
+
return null;
|
1033
|
+
},
|
1034
|
+
listProcessor: (listContent, blocks) => {
|
1035
|
+
NotionAITool.processNestedList(listContent, 'numbered_list_item', blocks);
|
869
1036
|
}
|
870
1037
|
},
|
871
1038
|
// Blockquotes: <blockquote>content</blockquote>
|
872
1039
|
{
|
873
1040
|
regex: /<blockquote>(.*?)<\/blockquote>/gis,
|
874
|
-
|
875
|
-
|
1041
|
+
blockCreator: (content) => {
|
1042
|
+
return {
|
876
1043
|
type: 'quote',
|
877
1044
|
quote: {
|
878
1045
|
rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
|
879
1046
|
},
|
880
|
-
}
|
881
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
1047
|
+
};
|
882
1048
|
}
|
883
1049
|
},
|
884
1050
|
// Preformatted text: <pre>content</pre>
|
885
1051
|
{
|
886
1052
|
regex: /<pre>(.*?)<\/pre>/gis,
|
887
|
-
|
888
|
-
|
1053
|
+
blockCreator: (content) => {
|
1054
|
+
return {
|
889
1055
|
type: 'code',
|
890
1056
|
code: {
|
891
1057
|
rich_text: [(0, NotionUtils_1.createRichText)(content.trim())],
|
892
1058
|
language: 'plain_text',
|
893
1059
|
},
|
894
|
-
}
|
895
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
1060
|
+
};
|
896
1061
|
}
|
897
1062
|
},
|
898
1063
|
// Standalone list items (only if not already processed in lists): <li>content</li>
|
899
1064
|
{
|
900
1065
|
regex: /<li\s*[^>]*>(.*?)<\/li>/gis,
|
901
|
-
|
1066
|
+
blockCreator: (content) => {
|
902
1067
|
if (content.trim()) {
|
903
|
-
|
1068
|
+
return {
|
904
1069
|
type: 'bulleted_list_item',
|
905
1070
|
bulleted_list_item: {
|
906
1071
|
rich_text: NotionAITool.parseBasicMarkdown(content.trim()),
|
907
1072
|
},
|
908
|
-
}
|
1073
|
+
};
|
909
1074
|
}
|
910
|
-
return
|
1075
|
+
return null;
|
911
1076
|
}
|
912
1077
|
},
|
913
1078
|
// Strong/Bold: <strong>content</strong> or <b>content</b> (only as standalone)
|
914
1079
|
{
|
915
1080
|
regex: /(?:^|>|\s)<(strong|b)>(.*?)<\/(strong|b)>(?=<|$|\s)/gis,
|
916
|
-
|
917
|
-
|
1081
|
+
blockCreator: (tag, content) => {
|
1082
|
+
return {
|
918
1083
|
type: 'paragraph',
|
919
1084
|
paragraph: {
|
920
1085
|
rich_text: NotionAITool.parseBasicMarkdown(`**${content.trim()}**`),
|
921
1086
|
},
|
922
|
-
}
|
923
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
1087
|
+
};
|
924
1088
|
}
|
925
1089
|
},
|
926
1090
|
// Emphasis/Italic: <em>content</em> or <i>content</i> (only as standalone)
|
927
1091
|
{
|
928
1092
|
regex: /(?:^|>|\s)<(em|i)>(.*?)<\/(em|i)>(?=<|$|\s)/gis,
|
929
|
-
|
930
|
-
|
1093
|
+
blockCreator: (tag, content) => {
|
1094
|
+
return {
|
931
1095
|
type: 'paragraph',
|
932
1096
|
paragraph: {
|
933
1097
|
rich_text: NotionAITool.parseBasicMarkdown(`*${content.trim()}*`),
|
934
1098
|
},
|
935
|
-
}
|
936
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
1099
|
+
};
|
937
1100
|
}
|
938
1101
|
},
|
939
1102
|
// Line breaks: <br/> or <br>
|
940
1103
|
{
|
941
1104
|
regex: /<br\s*\/?>/gis,
|
942
|
-
|
943
|
-
|
1105
|
+
blockCreator: () => {
|
1106
|
+
return {
|
944
1107
|
type: 'paragraph',
|
945
1108
|
paragraph: {
|
946
1109
|
rich_text: [(0, NotionUtils_1.createRichText)('')],
|
947
1110
|
},
|
948
|
-
}
|
949
|
-
return `__XML_BLOCK_${blockCounter++}__`;
|
1111
|
+
};
|
950
1112
|
}
|
951
1113
|
},
|
952
1114
|
];
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
1115
|
+
try {
|
1116
|
+
// Step 1: Build hierarchical XML tree
|
1117
|
+
const xmlTree = NotionAITool.buildXMLTree(processedContent, tagProcessors);
|
1118
|
+
if (DEBUG_ORDERING && xmlTree.length > 0) {
|
1119
|
+
console.log('XML Tree Structure:', xmlTree.map(node => ({
|
1120
|
+
tag: node.tagName,
|
1121
|
+
depth: node.depth,
|
1122
|
+
children: node.children.length,
|
1123
|
+
start: node.start
|
1124
|
+
})));
|
1125
|
+
}
|
1126
|
+
// Step 2: Process tree depth-first (children before parents)
|
1127
|
+
const replacements = NotionAITool.processXMLTreeDepthFirst(xmlTree, blocks, placeholderPrefix);
|
1128
|
+
// Step 3: Apply hierarchical replacements to content
|
1129
|
+
processedContent = NotionAITool.applyHierarchicalReplacements(processedContent, xmlTree, replacements);
|
1130
|
+
// Step 4: Clean up any remaining HTML tags
|
1131
|
+
processedContent = NotionAITool.cleanupRemainingHtml(processedContent, placeholderPrefix);
|
1132
|
+
if (DEBUG_ORDERING) {
|
1133
|
+
console.log(`Processed ${xmlTree.length} root XML nodes hierarchically, created ${blocks.length} blocks`);
|
1134
|
+
}
|
1135
|
+
}
|
1136
|
+
catch (error) {
|
1137
|
+
console.warn('Error in hierarchical XML processing, falling back to linear processing:', error);
|
1138
|
+
// Fallback to linear processing if hierarchical fails
|
1139
|
+
const allMatches = [];
|
1140
|
+
tagProcessors.forEach(({ regex, blockCreator }) => {
|
1141
|
+
const globalRegex = new RegExp(regex.source, 'gis');
|
1142
|
+
let match;
|
1143
|
+
while ((match = globalRegex.exec(processedContent)) !== null) {
|
1144
|
+
allMatches.push({
|
1145
|
+
start: match.index,
|
1146
|
+
end: match.index + match[0].length,
|
1147
|
+
match: match[0],
|
1148
|
+
processor: (match, group1, group2, group3) => {
|
1149
|
+
try {
|
1150
|
+
const block = blockCreator(group1 || '', group2 || '', group3 || '');
|
1151
|
+
if (block) {
|
1152
|
+
blocks.push(block);
|
1153
|
+
}
|
1154
|
+
return `${placeholderPrefix}${Math.random()}__`;
|
1155
|
+
}
|
1156
|
+
catch (error) {
|
1157
|
+
console.warn('Error in fallback processor:', error);
|
1158
|
+
return match;
|
1159
|
+
}
|
1160
|
+
},
|
1161
|
+
groups: match.slice(1)
|
1162
|
+
});
|
1163
|
+
}
|
957
1164
|
});
|
958
|
-
|
959
|
-
|
960
|
-
|
1165
|
+
const resolvedMatches = NotionAITool.resolveOverlaps(allMatches);
|
1166
|
+
resolvedMatches.sort((a, b) => a.start - b.start);
|
1167
|
+
const processedMatches = resolvedMatches.map(({ start, end, match, processor, groups }) => {
|
1168
|
+
try {
|
1169
|
+
const replacement = processor(match, groups[0] || '', groups[1] || '', groups[2] || '');
|
1170
|
+
return { start, end, replacement, match };
|
1171
|
+
}
|
1172
|
+
catch (error) {
|
1173
|
+
return { start, end, replacement: match, match };
|
1174
|
+
}
|
1175
|
+
});
|
1176
|
+
if (processedMatches.length > 0) {
|
1177
|
+
processedContent = NotionAITool.optimizedReplace(processedContent, processedMatches);
|
1178
|
+
}
|
1179
|
+
processedContent = NotionAITool.cleanupRemainingHtml(processedContent, placeholderPrefix);
|
1180
|
+
}
|
961
1181
|
return processedContent;
|
962
1182
|
}
|
963
|
-
// Cleanup function to remove remaining HTML tags
|
964
|
-
static cleanupRemainingHtml(content) {
|
1183
|
+
// Cleanup function to remove remaining HTML tags and XML_BLOCK artifacts
|
1184
|
+
static cleanupRemainingHtml(content, placeholderPrefix) {
|
965
1185
|
let cleaned = content;
|
1186
|
+
// Remove XML_BLOCK placeholder artifacts (support both old and new format)
|
1187
|
+
if (placeholderPrefix) {
|
1188
|
+
const placeholderRegex = new RegExp(`${placeholderPrefix.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\d+__`, 'g');
|
1189
|
+
cleaned = cleaned.replace(placeholderRegex, '');
|
1190
|
+
}
|
1191
|
+
else {
|
1192
|
+
// Fallback for backward compatibility
|
1193
|
+
cleaned = cleaned.replace(/__XML_BLOCK_\d+__/g, '');
|
1194
|
+
cleaned = cleaned.replace(/__XML_[a-f0-9]{8}_\d+__/g, '');
|
1195
|
+
}
|
966
1196
|
// Remove common HTML tags that might be left behind
|
967
1197
|
const htmlTagsToRemove = [
|
968
1198
|
/<\/?ul\s*[^>]*>/gi,
|
@@ -974,6 +1204,13 @@ class NotionAITool {
|
|
974
1204
|
/<\/?i\s*[^>]*>/gi,
|
975
1205
|
/<\/?div\s*[^>]*>/gi,
|
976
1206
|
/<\/?span\s*[^>]*>/gi,
|
1207
|
+
/<\/?p\s*[^>]*>/gi,
|
1208
|
+
/<\/?a\s*[^>]*>/gi,
|
1209
|
+
/<\/?code\s*[^>]*>/gi,
|
1210
|
+
/<\/?u\s*[^>]*>/gi,
|
1211
|
+
/<\/?s\s*[^>]*>/gi,
|
1212
|
+
/<\/?del\s*[^>]*>/gi,
|
1213
|
+
/<\/?mark\s*[^>]*>/gi,
|
977
1214
|
/<br\s*\/?>/gi,
|
978
1215
|
];
|
979
1216
|
htmlTagsToRemove.forEach(regex => {
|
@@ -983,8 +1220,131 @@ class NotionAITool {
|
|
983
1220
|
cleaned = cleaned.replace(/^\s*[\r\n]/gm, '');
|
984
1221
|
// Remove multiple consecutive line breaks
|
985
1222
|
cleaned = cleaned.replace(/\n{3,}/g, '\n\n');
|
1223
|
+
// Remove lines that contain only XML_BLOCK artifacts
|
1224
|
+
cleaned = cleaned.replace(/^.*__XML_BLOCK_\d+__.*$/gm, '');
|
1225
|
+
cleaned = cleaned.replace(/^.*__XML_[a-f0-9]{8}_\d+__.*$/gm, '');
|
986
1226
|
return cleaned.trim();
|
987
1227
|
}
|
1228
|
+
// Helper function to process nested HTML elements in list items
|
1229
|
+
static processNestedHtmlInListItem(content) {
|
1230
|
+
let processed = content;
|
1231
|
+
// First, remove wrapping <p> tags (common in nested content)
|
1232
|
+
processed = processed.replace(/^<p\s*[^>]*>(.*?)<\/p>$/gis, '$1');
|
1233
|
+
// Convert HTML formatting tags to markdown equivalents
|
1234
|
+
const htmlToMarkdown = [
|
1235
|
+
{ regex: /<strong\s*[^>]*>(.*?)<\/strong>/gis, replacement: '**$1**' },
|
1236
|
+
{ regex: /<b\s*[^>]*>(.*?)<\/b>/gis, replacement: '**$1**' },
|
1237
|
+
{ regex: /<em\s*[^>]*>(.*?)<\/em>/gis, replacement: '*$1*' },
|
1238
|
+
{ regex: /<i\s*[^>]*>(.*?)<\/i>/gis, replacement: '*$1*' },
|
1239
|
+
{ regex: /<code\s*[^>]*>(.*?)<\/code>/gis, replacement: '`$1`' },
|
1240
|
+
{ regex: /<a\s+href="([^"]*)"[^>]*>(.*?)<\/a>/gis, replacement: '[$2]($1)' },
|
1241
|
+
{ regex: /<u\s*[^>]*>(.*?)<\/u>/gis, replacement: '$1' }, // Notion doesn't support underline
|
1242
|
+
{ regex: /<s\s*[^>]*>(.*?)<\/s>/gis, replacement: '~~$1~~' },
|
1243
|
+
{ regex: /<del\s*[^>]*>(.*?)<\/del>/gis, replacement: '~~$1~~' },
|
1244
|
+
{ regex: /<mark\s*[^>]*>(.*?)<\/mark>/gis, replacement: '$1' }, // Notion doesn't support highlight in rich text
|
1245
|
+
];
|
1246
|
+
// Apply HTML to markdown conversions
|
1247
|
+
htmlToMarkdown.forEach(({ regex, replacement }) => {
|
1248
|
+
processed = processed.replace(regex, replacement);
|
1249
|
+
});
|
1250
|
+
// Remove any remaining HTML tags that we don't handle
|
1251
|
+
const tagsToRemove = [
|
1252
|
+
/<\/?div\s*[^>]*>/gi,
|
1253
|
+
/<\/?span\s*[^>]*>/gi,
|
1254
|
+
/<\/?p\s*[^>]*>/gi,
|
1255
|
+
/<br\s*\/?>/gi,
|
1256
|
+
];
|
1257
|
+
tagsToRemove.forEach(regex => {
|
1258
|
+
processed = processed.replace(regex, ' ');
|
1259
|
+
});
|
1260
|
+
// Clean up extra whitespace
|
1261
|
+
processed = processed.replace(/\s+/g, ' ').trim();
|
1262
|
+
return processed;
|
1263
|
+
}
|
1264
|
+
// Helper function to process nested lists and flatten them for Notion
|
1265
|
+
static processNestedList(listContent, listType, blocks) {
|
1266
|
+
// Extract top-level list items using a more careful approach
|
1267
|
+
const items = [];
|
1268
|
+
let currentPos = 0;
|
1269
|
+
while (currentPos < listContent.length) {
|
1270
|
+
const liStart = listContent.indexOf('<li', currentPos);
|
1271
|
+
if (liStart === -1)
|
1272
|
+
break;
|
1273
|
+
const liEndTag = listContent.indexOf('>', liStart);
|
1274
|
+
if (liEndTag === -1)
|
1275
|
+
break;
|
1276
|
+
// Find the matching closing </li> tag, accounting for nested content
|
1277
|
+
let depth = 1;
|
1278
|
+
let searchPos = liEndTag + 1;
|
1279
|
+
let liEnd = -1;
|
1280
|
+
while (searchPos < listContent.length && depth > 0) {
|
1281
|
+
const nextLiStart = listContent.indexOf('<li', searchPos);
|
1282
|
+
const nextLiEnd = listContent.indexOf('</li>', searchPos);
|
1283
|
+
if (nextLiEnd === -1)
|
1284
|
+
break;
|
1285
|
+
if (nextLiStart !== -1 && nextLiStart < nextLiEnd) {
|
1286
|
+
depth++;
|
1287
|
+
searchPos = nextLiStart + 3;
|
1288
|
+
}
|
1289
|
+
else {
|
1290
|
+
depth--;
|
1291
|
+
if (depth === 0) {
|
1292
|
+
liEnd = nextLiEnd;
|
1293
|
+
}
|
1294
|
+
searchPos = nextLiEnd + 5;
|
1295
|
+
}
|
1296
|
+
}
|
1297
|
+
if (liEnd === -1)
|
1298
|
+
break;
|
1299
|
+
// Extract the full <li>...</li> content
|
1300
|
+
const fullItem = listContent.substring(liStart, liEnd + 5);
|
1301
|
+
items.push(fullItem);
|
1302
|
+
currentPos = liEnd + 5;
|
1303
|
+
}
|
1304
|
+
// Process each top-level item
|
1305
|
+
items.forEach(item => {
|
1306
|
+
// Remove the outer <li> tags
|
1307
|
+
let itemContent = item.replace(/^<li[^>]*>/, '').replace(/<\/li>$/, '').trim();
|
1308
|
+
// Check if this item contains nested lists
|
1309
|
+
const hasNestedList = /<[uo]l\s*[^>]*>/i.test(itemContent);
|
1310
|
+
if (hasNestedList) {
|
1311
|
+
// Extract the text before the nested list
|
1312
|
+
const beforeNestedList = itemContent.replace(/<[uo]l\s*[^>]*>.*$/is, '').trim();
|
1313
|
+
if (beforeNestedList) {
|
1314
|
+
// Clean up and add the main item
|
1315
|
+
const cleanContent = NotionAITool.processNestedHtmlInListItem(beforeNestedList);
|
1316
|
+
if (cleanContent) {
|
1317
|
+
blocks.push({
|
1318
|
+
type: listType,
|
1319
|
+
[listType]: {
|
1320
|
+
rich_text: NotionAITool.parseBasicMarkdown(cleanContent),
|
1321
|
+
},
|
1322
|
+
});
|
1323
|
+
}
|
1324
|
+
}
|
1325
|
+
// Extract and process nested lists
|
1326
|
+
const nestedListMatch = itemContent.match(/<([uo]l)\s*[^>]*>(.*?)<\/\1>/is);
|
1327
|
+
if (nestedListMatch) {
|
1328
|
+
const [, nestedListTag, nestedContent] = nestedListMatch;
|
1329
|
+
const nestedListType = nestedListTag === 'ul' ? 'bulleted_list_item' : 'numbered_list_item';
|
1330
|
+
// Recursively process nested list
|
1331
|
+
NotionAITool.processNestedList(nestedContent, nestedListType, blocks);
|
1332
|
+
}
|
1333
|
+
}
|
1334
|
+
else {
|
1335
|
+
// Simple item without nested lists
|
1336
|
+
const cleanContent = NotionAITool.processNestedHtmlInListItem(itemContent);
|
1337
|
+
if (cleanContent) {
|
1338
|
+
blocks.push({
|
1339
|
+
type: listType,
|
1340
|
+
[listType]: {
|
1341
|
+
rich_text: NotionAITool.parseBasicMarkdown(cleanContent),
|
1342
|
+
},
|
1343
|
+
});
|
1344
|
+
}
|
1345
|
+
}
|
1346
|
+
});
|
1347
|
+
}
|
988
1348
|
// Helper function to get callout emoji based on type
|
989
1349
|
static getCalloutEmoji(type) {
|
990
1350
|
const emojiMap = {
|