npm - @uniweb/semantic-parser - Versions diffs - 1.0.12 → 1.0.14 - Mend

@uniweb/semantic-parser 1.0.12 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md +20 -0
package/package.json +1 -1
package/src/processors/sequence.js +136 -26

package/README.md CHANGED Viewed

@@ -322,10 +322,30 @@ Inline formatting is preserved as HTML tags:
 // Input: Text with bold mark
 // Output: "Text with <strong>bold</strong>"
+// Input: Text with italic mark
+// Output: "Text with <em>emphasis</em>"
 // Input: Link mark
 // Output: "Click <a href=\"/docs\">here</a>"
+// Input: Span mark (bracketed spans)
+// Output: "This is <span class=\"highlight\">highlighted</span> text"
 ```
+### Span Marks
+Bracketed spans (`[text]{.class}`) are converted to `<span>` elements with their attributes:
+```js
+// Input mark
+{ type: "span", attrs: { class: "highlight", id: "note-1" } }
+// Output HTML
+'<span class="highlight" id="note-1">text</span>'
+```
+Spans can have classes, IDs, and custom attributes. They combine with other marks—a span with bold becomes `<strong><span class="...">text</span></strong>`.
 ## Documentation
 - **[Content Writing Guide](./docs/guide.md)**: Learn how to structure content for optimal parsing

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@uniweb/semantic-parser",
-  "version": "1.0.12",
+  "version": "1.0.14",
   "description": "Semantic parser for ProseMirror/TipTap content structures",
   "type": "module",
   "main": "./src/index.js",

package/src/processors/sequence.js CHANGED Viewed

@@ -62,24 +62,19 @@ function processSequence(doc, options = {}) {
 function processNode(node, sequence, options) {
     if (node.content && Array.isArray(node.content)) {
-        // node.content?.forEach((child) => processNode(child, sequence, options));
-        // return;
         node.content?.forEach((child) => {
-            const element = createSequenceElement(child, options);
-            if (element) {
-                sequence.push(element);
+            const result = createSequenceElement(child, options);
+            if (result) {
+                // Handle case where element returns multiple items (e.g., paragraph with only links)
+                if (Array.isArray(result)) {
+                    sequence.push(...result);
+                } else {
+                    sequence.push(result);
+                }
             }
         });
     }
-    // Create element based on node type
-    // const element = createSequenceElement(node, options);
-    // //Skip empty paragraph when create sequence
-    // if (element) {
-    //     sequence.push(element);
-    // }
 }
 function createSequenceElement(node, options = {}) {
@@ -95,6 +90,12 @@ function createSequenceElement(node, options = {}) {
         };
     }
+    // Check for paragraph containing only multiple links (no other text)
+    const multipleLinks = isOnlyLinks(node);
+    if (multipleLinks) {
+        return multipleLinks; // Returns array of link elements
+    }
     const styledLink = isStyledLink(node);
     if (styledLink) return styledLink;
@@ -281,6 +282,26 @@ function getTextContent(content, options = {}) {
                     styledText = `<span style="background-color: var(--highlight)">${styledText}</span>`;
                 }
+                // span (bracketed spans with class/id/attributes)
+                if (marks.some((mark) => mark.type === "span")) {
+                    const spanMark = marks.find((mark) => mark.type === "span");
+                    const attrs = spanMark?.attrs || {};
+                    const attrParts = [];
+                    if (attrs.class) attrParts.push(`class="${attrs.class}"`);
+                    if (attrs.id) attrParts.push(`id="${attrs.id}"`);
+                    // Add any other custom attributes (data-*, etc.)
+                    for (const [key, value] of Object.entries(attrs)) {
+                        if (key !== 'class' && key !== 'id') {
+                            attrParts.push(`${key}="${value}"`);
+                        }
+                    }
+                    const attrString = attrParts.length > 0 ? ` ${attrParts.join(' ')}` : '';
+                    styledText = `<span${attrString}>${styledText}</span>`;
+                }
                 // bold
                 if (marks.some((mark) => mark.type === "bold")) {
                     styledText = `<strong>${styledText}</strong>`;
@@ -434,7 +455,7 @@ function parseDocumentBlock(itemAttrs) {
 }
 function parseUniwebIcon(itemAttrs) {
-    let { svg, url, size, color, preserveColors } = itemAttrs;
+    let { svg, url, size, color, preserveColors, href, target } = itemAttrs || {};
     return {
         svg,
@@ -442,6 +463,8 @@ function parseUniwebIcon(itemAttrs) {
         size,
         color,
         preserveColors,
+        href,
+        target,
     };
 }
@@ -461,12 +484,13 @@ function parseImgBlock(itemAttrs) {
         alt = "",
         url,
         href = "",
+        target = "",
         theme,
         role,
         credit = "",
     } = itemAttrs;
-    let { contentType, viewType, contentId, identifier } = imgInfo;
+    let { contentType, viewType, contentId, identifier } = imgInfo || {};
     const sizes = {
         center: "basic",
@@ -493,6 +517,7 @@ function parseImgBlock(itemAttrs) {
         imgPos: direction === "left" || direction === "right" ? direction : "",
         size: sizes[direction] || "basic",
         href,
+        target,
         theme,
         role,
         credit,
@@ -507,6 +532,8 @@ function parseVideoBlock(itemAttrs) {
         info = {},
         coverImg = {},
         alt,
+        href = "",
+        target = "",
     } = itemAttrs;
     let video = makeAssetUrl({
@@ -520,6 +547,8 @@ function parseVideoBlock(itemAttrs) {
         direction,
         coverImg: makeAssetUrl(coverImg),
         alt,
+        href,
+        target,
     };
 }
@@ -539,35 +568,64 @@ function stripTags(htmlString) {
 }
 function isLink(item) {
-    //For fast check, we only assume link in paragraph or heading
+    // Detect paragraphs/headings that are semantically "just a link"
+    // (single link text, possibly with decorative icons)
+    //
+    // For single-link paragraphs, the icon-link association is unambiguous:
+    // - Icons before the link text → iconBefore
+    // - Icons after the link text → iconAfter
+    //
+    // This supports natural content authoring: insert icon, type link text, add href
     if (["paragraph", "heading"].includes(item.type)) {
-        let content = item?.content || [];
+        const originalContent = item?.content || [];
-        //filter out icons
-        content = content.filter((c) => {
+        // Filter out icons and whitespace to check for single link
+        const textContent = originalContent.filter((c) => {
             if (c.type === "UniwebIcon") {
                 return false;
             } else if (c.type === "text") {
                 return (c.text || "").trim() !== "";
             }
             return true;
         });
-        if (content.length === 1) {
-            let contentItem = content?.[0];
+        if (textContent.length === 1) {
+            let contentItem = textContent[0];
             let marks = contentItem?.marks || [];
             for (let l = 0; l < marks.length; l++) {
                 let mark = marks[l];
-                const markType = mark?.type;
+                if (mark?.type === "link") {
+                    // Find the position of the link text in the original content
+                    const linkIndex = originalContent.findIndex(
+                        (c) => c.type === "text" && c.text === contentItem.text
+                    );
+                    // Collect icons before and after the link text
+                    let iconBefore = null;
+                    let iconAfter = null;
+                    for (let i = 0; i < originalContent.length; i++) {
+                        if (originalContent[i].type === "UniwebIcon") {
+                            const iconAttrs = parseUniwebIcon(originalContent[i].attrs);
+                            if (i < linkIndex) {
+                                // Take the last icon before the link
+                                iconBefore = iconAttrs;
+                            } else if (i > linkIndex) {
+                                // Take the first icon after the link
+                                if (!iconAfter) iconAfter = iconAttrs;
+                            }
+                        }
+                    }
-                if (markType === "link") {
                     return {
                         href: mark?.attrs?.href,
                         label: contentItem?.text || "",
-                        children: processInlineElements(content),
+                        iconBefore,
+                        iconAfter,
+                        // Preserve all inline elements for advanced rendering
+                        children: processInlineElements(originalContent),
                     };
                 }
             }
@@ -577,6 +635,58 @@ function isLink(item) {
     return false;
 }
+/**
+ * Check if a paragraph contains ONLY links (multiple links, no other text)
+ * If so, return array of link data to be added to sequence separately.
+ *
+ * This handles the common pattern of writing links on consecutive lines:
+ * ```
+ * [Privacy Policy](/privacy)
+ * [Terms of Service](/terms)
+ * ```
+ * Markdown treats these as a single paragraph, but semantically they're separate links.
+ *
+ * @param {Object} item - Sequence item (paragraph)
+ * @returns {Array|false} Array of link objects or false
+ */
+function isOnlyLinks(item) {
+    if (item.type !== "paragraph") return false;
+    const content = item?.content || [];
+    if (!content.length) return false;
+    // Filter to get only significant content (no icons, no whitespace)
+    const textContent = content.filter((c) => {
+        if (c.type === "UniwebIcon") return false;
+        if (c.type === "text" && !(c.text || "").trim()) return false;
+        return true;
+    });
+    if (textContent.length < 2) return false; // Single link handled by isLink
+    // Check if ALL remaining content items are text nodes with link marks
+    const allLinks = textContent.every((c) => {
+        if (c.type !== "text") return false;
+        const hasLinkMark = c.marks?.some((m) => m.type === "link");
+        return hasLinkMark;
+    });
+    if (!allLinks) return false;
+    // Extract links as simple {href, label} objects
+    // Icons in this paragraph go to body.icons separately (no association)
+    return textContent.map((c) => {
+        const linkMark = c.marks.find((m) => m.type === "link");
+        return {
+            type: "link",
+            attrs: {
+                href: linkMark?.attrs?.href,
+                label: c.text || "",
+            },
+        };
+    });
+}
 // method to check if given item has multiple content parts and each of them has the same link attrs with different inline style (plain, em, strong, u)
 // if so, it will return the link attrs and all the content parts whose link mark has been removed
 // warning: This method will not work if the any of the content parts are not link marks