@uniweb/semantic-parser 1.0.11 → 1.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -93,9 +93,9 @@ The `data` object holds all structured content:
93
93
 
94
94
  ```js
95
95
  data: {
96
- "nav-links": [...], // From ```json:nav-links or ```yaml:nav-links
96
+ "nav-links": [...], // From ```yaml:nav-links
97
97
  "config": {...}, // From ```yaml:config
98
- "form": {...}, // From FormBlock editor widget
98
+ "form": {...}, // From FormBlock editor widget or ```yaml:form
99
99
  "person": [...], // From card-group with cardType="person"
100
100
  "event": [...] // From card-group with cardType="event"
101
101
  }
@@ -132,8 +132,11 @@ See `docs/entity-consolidation.md` for complete mapping documentation.
132
132
  Code blocks with tags route parsed data to the `data` object:
133
133
 
134
134
  ```markdown
135
- ```json:nav-links
136
- [{ "label": "Home", "href": "/" }]
135
+ ```yaml:nav-links
136
+ - label: Home
137
+ href: /
138
+ - label: About
139
+ href: /about
137
140
  ```
138
141
 
139
142
  ```yaml:config
@@ -142,6 +145,8 @@ theme: dark
142
145
  ```
143
146
  ```
144
147
 
148
+ JSON is also supported (`json:tag-name`) if you prefer.
149
+
145
150
  Results in:
146
151
  ```js
147
152
  content.data['nav-links'] = [{ label: "Home", href: "/" }]
@@ -154,11 +154,14 @@ fields:
154
154
  submitLabel: Subscribe
155
155
  ```
156
156
 
157
- ```json:nav-links
158
- [{ "label": "Home", "href": "/" }]
157
+ ```yaml:nav-links
158
+ - label: Home
159
+ href: /
159
160
  ```
160
161
  ```
161
162
 
163
+ JSON is also supported (`json:tag-name`) if you prefer.
164
+
162
165
  ---
163
166
 
164
167
  ## Editor Node Mappings
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@uniweb/semantic-parser",
3
- "version": "1.0.11",
3
+ "version": "1.0.13",
4
4
  "description": "Semantic parser for ProseMirror/TipTap content structures",
5
5
  "type": "module",
6
6
  "main": "./src/index.js",
@@ -62,24 +62,19 @@ function processSequence(doc, options = {}) {
62
62
 
63
63
  function processNode(node, sequence, options) {
64
64
  if (node.content && Array.isArray(node.content)) {
65
- // node.content?.forEach((child) => processNode(child, sequence, options));
66
- // return;
67
65
  node.content?.forEach((child) => {
68
- const element = createSequenceElement(child, options);
69
-
70
- if (element) {
71
- sequence.push(element);
66
+ const result = createSequenceElement(child, options);
67
+
68
+ if (result) {
69
+ // Handle case where element returns multiple items (e.g., paragraph with only links)
70
+ if (Array.isArray(result)) {
71
+ sequence.push(...result);
72
+ } else {
73
+ sequence.push(result);
74
+ }
72
75
  }
73
76
  });
74
77
  }
75
-
76
- // Create element based on node type
77
- // const element = createSequenceElement(node, options);
78
-
79
- // //Skip empty paragraph when create sequence
80
- // if (element) {
81
- // sequence.push(element);
82
- // }
83
78
  }
84
79
 
85
80
  function createSequenceElement(node, options = {}) {
@@ -95,6 +90,12 @@ function createSequenceElement(node, options = {}) {
95
90
  };
96
91
  }
97
92
 
93
+ // Check for paragraph containing only multiple links (no other text)
94
+ const multipleLinks = isOnlyLinks(node);
95
+ if (multipleLinks) {
96
+ return multipleLinks; // Returns array of link elements
97
+ }
98
+
98
99
  const styledLink = isStyledLink(node);
99
100
 
100
101
  if (styledLink) return styledLink;
@@ -434,7 +435,7 @@ function parseDocumentBlock(itemAttrs) {
434
435
  }
435
436
 
436
437
  function parseUniwebIcon(itemAttrs) {
437
- let { svg, url, size, color, preserveColors } = itemAttrs;
438
+ let { svg, url, size, color, preserveColors, href, target } = itemAttrs || {};
438
439
 
439
440
  return {
440
441
  svg,
@@ -442,6 +443,8 @@ function parseUniwebIcon(itemAttrs) {
442
443
  size,
443
444
  color,
444
445
  preserveColors,
446
+ href,
447
+ target,
445
448
  };
446
449
  }
447
450
 
@@ -461,12 +464,13 @@ function parseImgBlock(itemAttrs) {
461
464
  alt = "",
462
465
  url,
463
466
  href = "",
467
+ target = "",
464
468
  theme,
465
469
  role,
466
470
  credit = "",
467
471
  } = itemAttrs;
468
472
 
469
- let { contentType, viewType, contentId, identifier } = imgInfo;
473
+ let { contentType, viewType, contentId, identifier } = imgInfo || {};
470
474
 
471
475
  const sizes = {
472
476
  center: "basic",
@@ -493,6 +497,7 @@ function parseImgBlock(itemAttrs) {
493
497
  imgPos: direction === "left" || direction === "right" ? direction : "",
494
498
  size: sizes[direction] || "basic",
495
499
  href,
500
+ target,
496
501
  theme,
497
502
  role,
498
503
  credit,
@@ -507,6 +512,8 @@ function parseVideoBlock(itemAttrs) {
507
512
  info = {},
508
513
  coverImg = {},
509
514
  alt,
515
+ href = "",
516
+ target = "",
510
517
  } = itemAttrs;
511
518
 
512
519
  let video = makeAssetUrl({
@@ -520,6 +527,8 @@ function parseVideoBlock(itemAttrs) {
520
527
  direction,
521
528
  coverImg: makeAssetUrl(coverImg),
522
529
  alt,
530
+ href,
531
+ target,
523
532
  };
524
533
  }
525
534
 
@@ -539,35 +548,64 @@ function stripTags(htmlString) {
539
548
  }
540
549
 
541
550
  function isLink(item) {
542
- //For fast check, we only assume link in paragraph or heading
551
+ // Detect paragraphs/headings that are semantically "just a link"
552
+ // (single link text, possibly with decorative icons)
553
+ //
554
+ // For single-link paragraphs, the icon-link association is unambiguous:
555
+ // - Icons before the link text → iconBefore
556
+ // - Icons after the link text → iconAfter
557
+ //
558
+ // This supports natural content authoring: insert icon, type link text, add href
543
559
  if (["paragraph", "heading"].includes(item.type)) {
544
- let content = item?.content || [];
560
+ const originalContent = item?.content || [];
545
561
 
546
- //filter out icons
547
- content = content.filter((c) => {
562
+ // Filter out icons and whitespace to check for single link
563
+ const textContent = originalContent.filter((c) => {
548
564
  if (c.type === "UniwebIcon") {
549
565
  return false;
550
566
  } else if (c.type === "text") {
551
567
  return (c.text || "").trim() !== "";
552
568
  }
553
-
554
569
  return true;
555
570
  });
556
571
 
557
- if (content.length === 1) {
558
- let contentItem = content?.[0];
572
+ if (textContent.length === 1) {
573
+ let contentItem = textContent[0];
559
574
  let marks = contentItem?.marks || [];
560
575
 
561
576
  for (let l = 0; l < marks.length; l++) {
562
577
  let mark = marks[l];
563
578
 
564
- const markType = mark?.type;
579
+ if (mark?.type === "link") {
580
+ // Find the position of the link text in the original content
581
+ const linkIndex = originalContent.findIndex(
582
+ (c) => c.type === "text" && c.text === contentItem.text
583
+ );
584
+
585
+ // Collect icons before and after the link text
586
+ let iconBefore = null;
587
+ let iconAfter = null;
588
+
589
+ for (let i = 0; i < originalContent.length; i++) {
590
+ if (originalContent[i].type === "UniwebIcon") {
591
+ const iconAttrs = parseUniwebIcon(originalContent[i].attrs);
592
+ if (i < linkIndex) {
593
+ // Take the last icon before the link
594
+ iconBefore = iconAttrs;
595
+ } else if (i > linkIndex) {
596
+ // Take the first icon after the link
597
+ if (!iconAfter) iconAfter = iconAttrs;
598
+ }
599
+ }
600
+ }
565
601
 
566
- if (markType === "link") {
567
602
  return {
568
603
  href: mark?.attrs?.href,
569
604
  label: contentItem?.text || "",
570
- children: processInlineElements(content),
605
+ iconBefore,
606
+ iconAfter,
607
+ // Preserve all inline elements for advanced rendering
608
+ children: processInlineElements(originalContent),
571
609
  };
572
610
  }
573
611
  }
@@ -577,6 +615,58 @@ function isLink(item) {
577
615
  return false;
578
616
  }
579
617
 
618
+ /**
619
+ * Check if a paragraph contains ONLY links (multiple links, no other text)
620
+ * If so, return array of link data to be added to sequence separately.
621
+ *
622
+ * This handles the common pattern of writing links on consecutive lines:
623
+ * ```
624
+ * [Privacy Policy](/privacy)
625
+ * [Terms of Service](/terms)
626
+ * ```
627
+ * Markdown treats these as a single paragraph, but semantically they're separate links.
628
+ *
629
+ * @param {Object} item - Sequence item (paragraph)
630
+ * @returns {Array|false} Array of link objects or false
631
+ */
632
+ function isOnlyLinks(item) {
633
+ if (item.type !== "paragraph") return false;
634
+
635
+ const content = item?.content || [];
636
+ if (!content.length) return false;
637
+
638
+ // Filter to get only significant content (no icons, no whitespace)
639
+ const textContent = content.filter((c) => {
640
+ if (c.type === "UniwebIcon") return false;
641
+ if (c.type === "text" && !(c.text || "").trim()) return false;
642
+ return true;
643
+ });
644
+
645
+ if (textContent.length < 2) return false; // Single link handled by isLink
646
+
647
+ // Check if ALL remaining content items are text nodes with link marks
648
+ const allLinks = textContent.every((c) => {
649
+ if (c.type !== "text") return false;
650
+ const hasLinkMark = c.marks?.some((m) => m.type === "link");
651
+ return hasLinkMark;
652
+ });
653
+
654
+ if (!allLinks) return false;
655
+
656
+ // Extract links as simple {href, label} objects
657
+ // Icons in this paragraph go to body.icons separately (no association)
658
+ return textContent.map((c) => {
659
+ const linkMark = c.marks.find((m) => m.type === "link");
660
+ return {
661
+ type: "link",
662
+ attrs: {
663
+ href: linkMark?.attrs?.href,
664
+ label: c.text || "",
665
+ },
666
+ };
667
+ });
668
+ }
669
+
580
670
  // method to check if given item has multiple content parts and each of them has the same link attrs with different inline style (plain, em, strong, u)
581
671
  // if so, it will return the link attrs and all the content parts whose link mark has been removed
582
672
  // warning: This method will not work if the any of the content parts are not link marks