npm - @uniweb/semantic-parser - Versions diffs - 1.0.9 → 1.0.10 - Mend

@uniweb/semantic-parser 1.0.9 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/AGENTS.md +42 -25
package/README.md +6 -6
package/package.json +4 -1
package/src/processors/groups.js +15 -11
package/src/processors/sequence.js +59 -11

package/AGENTS.md CHANGED Viewed

@@ -52,35 +52,26 @@ const result = parseContent(doc);
 // }
 ```
-### Content Group Structure
+### Content Output Structure
-Groups follow a specific structure defined in `processGroupContent()`:
+The parser returns a flat content structure:
 ```js
 {
-  header: {
-    pretitle: '',  // H3 before main title
-    title: '',     // Main heading (H1 or H2)
-    subtitle: ''   // Heading after main title
-  },
-  body: {
-    imgs: [],
-    icons: [],
-    videos: [],
-    paragraphs: [],
-    links: [],
-    lists: [],
-    buttons: [],
-    properties: [],
-    propertyBlocks: [],
-    cards: [],
-    headings: []
-  },
-  banner: null,    // Image with banner role or image before heading
-  metadata: {
-    level: null,   // Heading level that started this group
-    contentTypes: Set()
-  }
+  title: '',       // Main heading
+  pretitle: '',    // Heading before main title
+  subtitle: '',    // Heading after main title
+  paragraphs: [],
+  links: [],
+  imgs: [],
+  icons: [],
+  videos: [],
+  lists: [],
+  buttons: [],
+  data: {},        // Tagged code blocks (keyed by tag name)
+  cards: [],
+  headings: [],
+  items: [],       // Child content groups
 }
 ```
@@ -102,6 +93,32 @@ The sequence processor identifies several special element types by inspecting pa
 These are extracted into dedicated element types for easier downstream processing.
+### Tagged Code Blocks
+Code blocks with tags route parsed data to the `data` object:
+```markdown
+```json:nav-links
+[{ "label": "Home", "href": "/" }]
+```
+```yaml:config
+title: My Site
+theme: dark
+```
+```
+Results in:
+```js
+content.data['nav-links'] = [{ label: "Home", href: "/" }]
+content.data['config'] = { title: "My Site", theme: "dark" }
+```
+**Parsing rules:**
+- Tagged blocks with `json` language: parsed as JSON
+- Tagged blocks with `yaml`/`yml` language: parsed as YAML
+- Untagged blocks: not parsed (stay as raw text in sequence for display)
 ### List Processing
 Lists maintain hierarchy through nested structure. The `processListItems()` function in sequence.js handles nested lists, while `processListContent()` in groups.js applies full group content processing to each list item, allowing lists to contain rich content (images, paragraphs, nested lists, etc.).

package/README.md CHANGED Viewed

@@ -60,14 +60,14 @@ result.sequence = [
 ### Content Structure
-Main content fields are at the top level. The `items` array contains additional content groups (e.g., H3 sections), each with the same field structure:
+Main content fields are at the top level. The `items` array contains additional content groups (created when headings appear after content), each with the same field structure:
 ```js
 result = {
   // Main content fields
-  pretitle: "",             // H3 before main title
-  title: "Welcome",         // Main heading (H1)
-  subtitle: "",             // H2 after main title
+  pretitle: "",             // Heading before main title
+  title: "Welcome",         // Main heading
+  subtitle: "",             // Heading after main title
   paragraphs: ["Get started today."],
   imgs: [],
   videos: [],
@@ -78,7 +78,7 @@ result = {
   banner: null,             // Optional banner image
   // ... more content types
-  // Additional content groups (H3 sections)
+  // Additional content groups (from headings after content)
   items: [
     { title: "Feature 1", paragraphs: [...], links: [...] },
     { title: "Feature 2", paragraphs: [...], links: [...] }
@@ -113,7 +113,7 @@ const content = parseContent(doc);
 console.log("Title:", content.title);
 console.log("Description:", content.paragraphs);
-// Additional sections (H3 groups)
+// Additional content groups
 content.items.forEach(item => {
   console.log("Section:", item.title);
   console.log("Content:", item.paragraphs);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@uniweb/semantic-parser",
-  "version": "1.0.9",
+  "version": "1.0.10",
   "description": "Semantic parser for ProseMirror/TipTap content structures",
   "type": "module",
   "main": "./src/index.js",
@@ -33,6 +33,9 @@
     "doc": "docs",
     "test": "tests"
   },
+  "dependencies": {
+    "yaml": "^2.8.2"
+  },
   "scripts": {
     "test": "NODE_OPTIONS=--experimental-vm-modules jest",
     "test-report": "NODE_OPTIONS=--experimental-vm-modules jest --json > test-results.json 2>&1",

package/src/processors/groups.js CHANGED Viewed

@@ -18,8 +18,7 @@ function flattenGroup(group) {
         lists: group.body.lists || [],
         videos: group.body.videos || [],
         buttons: group.body.buttons || [],
-        properties: group.body.properties || {},
-        propertyBlocks: group.body.propertyBlocks || [],
+        data: group.body.data || {},
         cards: group.body.cards || [],
         documents: group.body.documents || [],
         forms: group.body.forms || [],
@@ -50,8 +49,7 @@ function processGroups(sequence, options = {}) {
             lists: [],
             videos: [],
             buttons: [],
-            properties: {},
-            propertyBlocks: [],
+            data: {},
             cards: [],
             documents: [],
             forms: [],
@@ -92,8 +90,7 @@ function processGroups(sequence, options = {}) {
         lists: [],
         videos: [],
         buttons: [],
-        properties: {},
-        propertyBlocks: [],
+        data: {},
         cards: [],
         documents: [],
         forms: [],
@@ -239,8 +236,7 @@ function processGroupContent(elements) {
         links: [],
         lists: [],
         buttons: [],
-        properties: {},
-        propertyBlocks: [],
+        data: {},
         cards: [],
         documents: [],
         forms: [],
@@ -345,10 +341,18 @@ function processGroupContent(elements) {
                     body.quotes.push(quoteContent.body);
                     break;
+                case "dataBlock":
+                    // Pre-parsed structured data from content-reader
+                    body.data[element.tag] = element.data;
+                    break;
                 case "codeBlock":
-                    const codeData = element.text;
-                    body.properties = codeData; // Last one
-                    body.propertyBlocks.push(codeData); // All of them
+                    // Fallback: tagged code blocks where parsing failed at build time
+                    // Untagged blocks stay in sequence for display
+                    const tag = element.attrs?.tag;
+                    if (tag) {
+                        body.data[tag] = element.text;
+                    }
                     break;
                 case "form":

package/src/processors/sequence.js CHANGED Viewed

@@ -1,3 +1,52 @@
+import { parse as parseYaml } from "yaml";
+/**
+ * Get code block data - prefers pre-parsed attrs.data, falls back to parsing text
+ *
+ * Content can come from two sources:
+ * 1. Pre-parsed at build time: attrs.data contains parsed JS object
+ * 2. Legacy/runtime: text needs to be parsed based on language
+ *
+ * @param {string} text - Raw code block text
+ * @param {Object} attrs - Code block attributes (language, tag, data)
+ * @returns {*} Parsed data or raw text
+ */
+function getCodeBlockData(text, attrs) {
+    const { language, tag, data } = attrs || {};
+    // Only process tagged blocks
+    if (!tag) {
+        return text;
+    }
+    // Prefer pre-parsed data from build time (attrs.data)
+    if (data !== undefined) {
+        return data;
+    }
+    // Fallback: parse text at runtime (for backwards compatibility)
+    const lang = (language || "").toLowerCase();
+    if (lang === "json") {
+        try {
+            return JSON.parse(text);
+        } catch {
+            return text;
+        }
+    }
+    if (lang === "yaml" || lang === "yml") {
+        try {
+            return parseYaml(text);
+        } catch {
+            return text;
+        }
+    }
+    // Unknown language - return raw text
+    return text;
+}
 /**
  * Process a ProseMirror/TipTap document into a flat sequence
  * @param {Object} doc ProseMirror document
@@ -79,20 +128,19 @@ function createSequenceElement(node, options = {}) {
                 attrs,
             };
-        case "codeBlock":
-            let textContent = getTextContent(content, options);
-            let parsed = "";
-            //Try pasre json if possible
-            try {
-                parsed = JSON.parse(`${textContent}`);
-            } catch (err) {
-                parsed = textContent;
-            }
+        case "dataBlock":
+            // Pre-parsed structured data from content-reader
+            return {
+                type: "dataBlock",
+                data: attrs.data,
+                tag: attrs.tag,
+            };
+        case "codeBlock":
+            const codeText = getTextContent(content, options);
             return {
                 type: "codeBlock",
-                text: parsed,
+                text: getCodeBlockData(codeText, attrs),
                 attrs,
             };