@uniweb/semantic-parser 1.0.12 → 1.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/processors/sequence.js +116 -26
package/package.json
CHANGED
|
@@ -62,24 +62,19 @@ function processSequence(doc, options = {}) {
|
|
|
62
62
|
|
|
63
63
|
function processNode(node, sequence, options) {
|
|
64
64
|
if (node.content && Array.isArray(node.content)) {
|
|
65
|
-
// node.content?.forEach((child) => processNode(child, sequence, options));
|
|
66
|
-
// return;
|
|
67
65
|
node.content?.forEach((child) => {
|
|
68
|
-
const
|
|
69
|
-
|
|
70
|
-
if (
|
|
71
|
-
|
|
66
|
+
const result = createSequenceElement(child, options);
|
|
67
|
+
|
|
68
|
+
if (result) {
|
|
69
|
+
// Handle case where element returns multiple items (e.g., paragraph with only links)
|
|
70
|
+
if (Array.isArray(result)) {
|
|
71
|
+
sequence.push(...result);
|
|
72
|
+
} else {
|
|
73
|
+
sequence.push(result);
|
|
74
|
+
}
|
|
72
75
|
}
|
|
73
76
|
});
|
|
74
77
|
}
|
|
75
|
-
|
|
76
|
-
// Create element based on node type
|
|
77
|
-
// const element = createSequenceElement(node, options);
|
|
78
|
-
|
|
79
|
-
// //Skip empty paragraph when create sequence
|
|
80
|
-
// if (element) {
|
|
81
|
-
// sequence.push(element);
|
|
82
|
-
// }
|
|
83
78
|
}
|
|
84
79
|
|
|
85
80
|
function createSequenceElement(node, options = {}) {
|
|
@@ -95,6 +90,12 @@ function createSequenceElement(node, options = {}) {
|
|
|
95
90
|
};
|
|
96
91
|
}
|
|
97
92
|
|
|
93
|
+
// Check for paragraph containing only multiple links (no other text)
|
|
94
|
+
const multipleLinks = isOnlyLinks(node);
|
|
95
|
+
if (multipleLinks) {
|
|
96
|
+
return multipleLinks; // Returns array of link elements
|
|
97
|
+
}
|
|
98
|
+
|
|
98
99
|
const styledLink = isStyledLink(node);
|
|
99
100
|
|
|
100
101
|
if (styledLink) return styledLink;
|
|
@@ -434,7 +435,7 @@ function parseDocumentBlock(itemAttrs) {
|
|
|
434
435
|
}
|
|
435
436
|
|
|
436
437
|
function parseUniwebIcon(itemAttrs) {
|
|
437
|
-
let { svg, url, size, color, preserveColors } = itemAttrs;
|
|
438
|
+
let { svg, url, size, color, preserveColors, href, target } = itemAttrs || {};
|
|
438
439
|
|
|
439
440
|
return {
|
|
440
441
|
svg,
|
|
@@ -442,6 +443,8 @@ function parseUniwebIcon(itemAttrs) {
|
|
|
442
443
|
size,
|
|
443
444
|
color,
|
|
444
445
|
preserveColors,
|
|
446
|
+
href,
|
|
447
|
+
target,
|
|
445
448
|
};
|
|
446
449
|
}
|
|
447
450
|
|
|
@@ -461,12 +464,13 @@ function parseImgBlock(itemAttrs) {
|
|
|
461
464
|
alt = "",
|
|
462
465
|
url,
|
|
463
466
|
href = "",
|
|
467
|
+
target = "",
|
|
464
468
|
theme,
|
|
465
469
|
role,
|
|
466
470
|
credit = "",
|
|
467
471
|
} = itemAttrs;
|
|
468
472
|
|
|
469
|
-
let { contentType, viewType, contentId, identifier } = imgInfo;
|
|
473
|
+
let { contentType, viewType, contentId, identifier } = imgInfo || {};
|
|
470
474
|
|
|
471
475
|
const sizes = {
|
|
472
476
|
center: "basic",
|
|
@@ -493,6 +497,7 @@ function parseImgBlock(itemAttrs) {
|
|
|
493
497
|
imgPos: direction === "left" || direction === "right" ? direction : "",
|
|
494
498
|
size: sizes[direction] || "basic",
|
|
495
499
|
href,
|
|
500
|
+
target,
|
|
496
501
|
theme,
|
|
497
502
|
role,
|
|
498
503
|
credit,
|
|
@@ -507,6 +512,8 @@ function parseVideoBlock(itemAttrs) {
|
|
|
507
512
|
info = {},
|
|
508
513
|
coverImg = {},
|
|
509
514
|
alt,
|
|
515
|
+
href = "",
|
|
516
|
+
target = "",
|
|
510
517
|
} = itemAttrs;
|
|
511
518
|
|
|
512
519
|
let video = makeAssetUrl({
|
|
@@ -520,6 +527,8 @@ function parseVideoBlock(itemAttrs) {
|
|
|
520
527
|
direction,
|
|
521
528
|
coverImg: makeAssetUrl(coverImg),
|
|
522
529
|
alt,
|
|
530
|
+
href,
|
|
531
|
+
target,
|
|
523
532
|
};
|
|
524
533
|
}
|
|
525
534
|
|
|
@@ -539,35 +548,64 @@ function stripTags(htmlString) {
|
|
|
539
548
|
}
|
|
540
549
|
|
|
541
550
|
function isLink(item) {
|
|
542
|
-
//
|
|
551
|
+
// Detect paragraphs/headings that are semantically "just a link"
|
|
552
|
+
// (single link text, possibly with decorative icons)
|
|
553
|
+
//
|
|
554
|
+
// For single-link paragraphs, the icon-link association is unambiguous:
|
|
555
|
+
// - Icons before the link text → iconBefore
|
|
556
|
+
// - Icons after the link text → iconAfter
|
|
557
|
+
//
|
|
558
|
+
// This supports natural content authoring: insert icon, type link text, add href
|
|
543
559
|
if (["paragraph", "heading"].includes(item.type)) {
|
|
544
|
-
|
|
560
|
+
const originalContent = item?.content || [];
|
|
545
561
|
|
|
546
|
-
//
|
|
547
|
-
|
|
562
|
+
// Filter out icons and whitespace to check for single link
|
|
563
|
+
const textContent = originalContent.filter((c) => {
|
|
548
564
|
if (c.type === "UniwebIcon") {
|
|
549
565
|
return false;
|
|
550
566
|
} else if (c.type === "text") {
|
|
551
567
|
return (c.text || "").trim() !== "";
|
|
552
568
|
}
|
|
553
|
-
|
|
554
569
|
return true;
|
|
555
570
|
});
|
|
556
571
|
|
|
557
|
-
if (
|
|
558
|
-
let contentItem =
|
|
572
|
+
if (textContent.length === 1) {
|
|
573
|
+
let contentItem = textContent[0];
|
|
559
574
|
let marks = contentItem?.marks || [];
|
|
560
575
|
|
|
561
576
|
for (let l = 0; l < marks.length; l++) {
|
|
562
577
|
let mark = marks[l];
|
|
563
578
|
|
|
564
|
-
|
|
579
|
+
if (mark?.type === "link") {
|
|
580
|
+
// Find the position of the link text in the original content
|
|
581
|
+
const linkIndex = originalContent.findIndex(
|
|
582
|
+
(c) => c.type === "text" && c.text === contentItem.text
|
|
583
|
+
);
|
|
584
|
+
|
|
585
|
+
// Collect icons before and after the link text
|
|
586
|
+
let iconBefore = null;
|
|
587
|
+
let iconAfter = null;
|
|
588
|
+
|
|
589
|
+
for (let i = 0; i < originalContent.length; i++) {
|
|
590
|
+
if (originalContent[i].type === "UniwebIcon") {
|
|
591
|
+
const iconAttrs = parseUniwebIcon(originalContent[i].attrs);
|
|
592
|
+
if (i < linkIndex) {
|
|
593
|
+
// Take the last icon before the link
|
|
594
|
+
iconBefore = iconAttrs;
|
|
595
|
+
} else if (i > linkIndex) {
|
|
596
|
+
// Take the first icon after the link
|
|
597
|
+
if (!iconAfter) iconAfter = iconAttrs;
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
}
|
|
565
601
|
|
|
566
|
-
if (markType === "link") {
|
|
567
602
|
return {
|
|
568
603
|
href: mark?.attrs?.href,
|
|
569
604
|
label: contentItem?.text || "",
|
|
570
|
-
|
|
605
|
+
iconBefore,
|
|
606
|
+
iconAfter,
|
|
607
|
+
// Preserve all inline elements for advanced rendering
|
|
608
|
+
children: processInlineElements(originalContent),
|
|
571
609
|
};
|
|
572
610
|
}
|
|
573
611
|
}
|
|
@@ -577,6 +615,58 @@ function isLink(item) {
|
|
|
577
615
|
return false;
|
|
578
616
|
}
|
|
579
617
|
|
|
618
|
+
/**
|
|
619
|
+
* Check if a paragraph contains ONLY links (multiple links, no other text)
|
|
620
|
+
* If so, return array of link data to be added to sequence separately.
|
|
621
|
+
*
|
|
622
|
+
* This handles the common pattern of writing links on consecutive lines:
|
|
623
|
+
* ```
|
|
624
|
+
* [Privacy Policy](/privacy)
|
|
625
|
+
* [Terms of Service](/terms)
|
|
626
|
+
* ```
|
|
627
|
+
* Markdown treats these as a single paragraph, but semantically they're separate links.
|
|
628
|
+
*
|
|
629
|
+
* @param {Object} item - Sequence item (paragraph)
|
|
630
|
+
* @returns {Array|false} Array of link objects or false
|
|
631
|
+
*/
|
|
632
|
+
function isOnlyLinks(item) {
|
|
633
|
+
if (item.type !== "paragraph") return false;
|
|
634
|
+
|
|
635
|
+
const content = item?.content || [];
|
|
636
|
+
if (!content.length) return false;
|
|
637
|
+
|
|
638
|
+
// Filter to get only significant content (no icons, no whitespace)
|
|
639
|
+
const textContent = content.filter((c) => {
|
|
640
|
+
if (c.type === "UniwebIcon") return false;
|
|
641
|
+
if (c.type === "text" && !(c.text || "").trim()) return false;
|
|
642
|
+
return true;
|
|
643
|
+
});
|
|
644
|
+
|
|
645
|
+
if (textContent.length < 2) return false; // Single link handled by isLink
|
|
646
|
+
|
|
647
|
+
// Check if ALL remaining content items are text nodes with link marks
|
|
648
|
+
const allLinks = textContent.every((c) => {
|
|
649
|
+
if (c.type !== "text") return false;
|
|
650
|
+
const hasLinkMark = c.marks?.some((m) => m.type === "link");
|
|
651
|
+
return hasLinkMark;
|
|
652
|
+
});
|
|
653
|
+
|
|
654
|
+
if (!allLinks) return false;
|
|
655
|
+
|
|
656
|
+
// Extract links as simple {href, label} objects
|
|
657
|
+
// Icons in this paragraph go to body.icons separately (no association)
|
|
658
|
+
return textContent.map((c) => {
|
|
659
|
+
const linkMark = c.marks.find((m) => m.type === "link");
|
|
660
|
+
return {
|
|
661
|
+
type: "link",
|
|
662
|
+
attrs: {
|
|
663
|
+
href: linkMark?.attrs?.href,
|
|
664
|
+
label: c.text || "",
|
|
665
|
+
},
|
|
666
|
+
};
|
|
667
|
+
});
|
|
668
|
+
}
|
|
669
|
+
|
|
580
670
|
// method to check if given item has multiple content parts and each of them has the same link attrs with different inline style (plain, em, strong, u)
|
|
581
671
|
// if so, it will return the link attrs and all the content parts whose link mark has been removed
|
|
582
672
|
// warning: This method will not work if the any of the content parts are not link marks
|