npm - @uniweb/semantic-parser - Versions diffs - 1.1.3 → 1.1.5 - Mend

@uniweb/semantic-parser 1.1.3 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/AGENTS.md +5 -4
package/README.md +3 -3
package/docs/api.md +2 -2
package/docs/entity-consolidation.md +2 -2
package/docs/mapping-patterns.md +11 -11
package/package.json +1 -1
package/src/mappers/accessor.js +2 -2
package/src/mappers/extractors.js +17 -17
package/src/processors/groups.js +47 -12
package/src/processors/groups_backup.js +2 -2

package/AGENTS.md CHANGED Viewed

@@ -64,12 +64,13 @@ The parser returns a flat content structure:
   subtitle2: '',   // Third heading level
   paragraphs: [],
   links: [],       // All link-like entities (including buttons, documents)
-  imgs: [],
+  images: [],
   icons: [],
   videos: [],
   lists: [],
   quotes: [],
-  data: {},        // Structured data (tagged code blocks, forms, cards)
+  snippets: [],    // Fenced code — [{ language, code }]
+  data: {},        // Structured data (tagged data blocks, forms, cards)
   headings: [],    // Overflow headings after title/subtitle/subtitle2
   items: [],       // Child content groups (same structure recursively)
 }
@@ -127,9 +128,9 @@ Editor-specific nodes are mapped to standard entities:
 See `docs/entity-consolidation.md` for complete mapping documentation.
-### Tagged Code Blocks
+### Tagged Data Blocks
-Code blocks with tags route parsed data to the `data` object:
+Data blocks with tags route parsed data to the `data` object:
 ```markdown
 ```yaml:nav-links

package/README.md CHANGED Viewed

@@ -73,7 +73,7 @@ result = {
   // Body fields
   paragraphs: ["Get started today."],
   links: [],                // All links (including buttons, documents)
-  imgs: [],
+  images: [],
   videos: [],
   icons: [],
   lists: [],
@@ -164,7 +164,7 @@ const schema = {
     maxLength: 150
   },
   image: {
-    path: "imgs[0].url",
+    path: "images[0].url",
     type: "image",
     defaultValue: "/placeholder.jpg"
   }
@@ -214,7 +214,7 @@ const schema = {
   title: "title",
   subtitle: "subtitle",
   image: {
-    path: "imgs[0].url",
+    path: "images[0].url",
     defaultValue: "/placeholder.jpg"
   },
   actions: {

package/docs/api.md CHANGED Viewed

@@ -130,7 +130,7 @@ Content organized into semantic groups with identified main content and items. T
     // Body fields (flat)
     paragraphs: ["paragraph text", ...],
-    imgs: [
+    images: [
       { url: "...", caption: "...", alt: "..." }
     ],
     icons: ["<svg>...</svg>", ...],
@@ -160,7 +160,7 @@ Content organized into semantic groups with identified main content and items. T
   },
   items: [
     // Array of groups with same flat structure as main
-    // { title, pretitle, subtitle, paragraphs, imgs, ... }
+    // { title, pretitle, subtitle, paragraphs, images, ... }
   ],
   metadata: {
     dividerMode: false,     // Whether dividers were used for grouping

package/docs/entity-consolidation.md CHANGED Viewed

@@ -29,7 +29,7 @@ After consolidation, the parser outputs this flat structure:
     // Body fields
     paragraphs: [],    // Text blocks with inline HTML formatting
     links: [],         // All link-like entities (buttons, documents, nav links)
-    imgs: [],          // All images (with role distinguishing purpose)
+    images: [],          // All images (with role distinguishing purpose)
     videos: [],        // Video embeds
     icons: [],         // Standalone icons
     lists: [],         // Bullet/ordered lists (recursive structure)
@@ -94,7 +94,7 @@ All link-like content merges into the `links` array. The `role` attribute distin
 ### Images
-All image content uses the `imgs` array. The `role` attribute distinguishes purpose.
+All image content uses the `images` array. The `role` attribute distinguishes purpose.
 ```js
 {

package/docs/mapping-patterns.md CHANGED Viewed

@@ -37,7 +37,7 @@ const schema = {
     maxLength: 150
   },
   image: {
-    path: "groups.main.imgs[0].url",
+    path: "groups.main.images[0].url",
     type: "image",      // Normalizes image data
     defaultValue: "/placeholder.jpg",
     treatEmptyAsDefault: true
@@ -152,7 +152,7 @@ Normalizes image data structure.
 ```js
 {
   image: {
-    path: "groups.main.imgs[0]",
+    path: "groups.main.images[0]",
     type: "image",
     defaultValue: "/placeholder.jpg",
     defaultAlt: "Image"
@@ -234,7 +234,7 @@ const componentSchema = {
     maxLength: 200
   },
   image: {
-    path: "groups.main.imgs[0].url",
+    path: "groups.main.images[0].url",
     type: "image",
     defaultValue: "/placeholder.jpg"
   },
@@ -273,7 +273,7 @@ const heroData = mappers.extractors.hero(parsed);
 // Or use schema-based extraction
 const customData = mappers.extractBySchema(parsed, {
   title: "groups.main.title",
-  image: { path: "groups.main.imgs[0].url", defaultValue: "/placeholder.jpg" }
+  image: { path: "groups.main.images[0].url", defaultValue: "/placeholder.jpg" }
 });
 ```
@@ -354,10 +354,10 @@ const { accessor } = mappers;
 const title = accessor.getByPath(parsed, "groups.main.title");
 // Array index notation
-const firstImage = accessor.getByPath(parsed, "groups.main.imgs[0].url");
+const firstImage = accessor.getByPath(parsed, "groups.main.images[0].url");
 // With default value
-const image = accessor.getByPath(parsed, "groups.main.imgs[0].url", {
+const image = accessor.getByPath(parsed, "groups.main.images[0].url", {
   defaultValue: "/placeholder.jpg"
 });
@@ -383,7 +383,7 @@ const schema = {
   // Full config with options
   image: {
-    path: "groups.main.imgs[0].url",
+    path: "groups.main.images[0].url",
     defaultValue: "/placeholder.jpg"
   },
@@ -420,7 +420,7 @@ const titles = accessor.mapArray(parsed, "groups.items", "title");
 const cards = accessor.mapArray(parsed, "groups.items", {
   title: "title",
   text: { path: "paragraphs", transform: p => p.join(" ") },
-  image: { path: "imgs[0].url", defaultValue: "/default.jpg" }
+  image: { path: "images[0].url", defaultValue: "/default.jpg" }
 });
 // [
 //   { title: "...", text: "...", image: "..." },
@@ -439,8 +439,8 @@ if (accessor.hasPath(parsed, "groups.main.banner.url")) {
 // Get first existing path (flat structure)
 const image = accessor.getFirstExisting(parsed, [
   "groups.main.banner.url",
-  "groups.main.imgs[0].url",
-  "groups.items[0].imgs[0].url"
+  "groups.main.images[0].url",
+  "groups.items[0].images[0].url"
 ], "/fallback.jpg");
 ```
@@ -666,7 +666,7 @@ const componentSchema = {
       brand: "groups.main.pretitle",
       title: "groups.main.title",
       subtitle: "groups.main.subtitle",
-      image: { path: "groups.main.imgs[0].url", defaultValue: "/default.jpg" },
+      image: { path: "groups.main.images[0].url", defaultValue: "/default.jpg" },
       actions: {
         path: "groups.main.links",
         transform: links => links.map(l => ({ label: l.label, type: "primary" }))

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@uniweb/semantic-parser",
-  "version": "1.1.3",
+  "version": "1.1.5",
   "description": "Semantic parser for ProseMirror/TipTap content structures",
   "type": "module",
   "main": "./src/index.js",

package/src/mappers/accessor.js CHANGED Viewed

@@ -6,7 +6,7 @@ import { applyType, validateType } from './types.js';
 /**
  * Parse a path string into segments, handling array indices
- * @param {string} path - Path string (e.g., 'groups.main.body.imgs[0].url')
+ * @param {string} path - Path string (e.g., 'groups.main.body.images[0].url')
  * @returns {Array} Array of path segments
  */
 function parsePath(path) {
@@ -118,7 +118,7 @@ function getByPath(parsed, path, options = {}) {
  *     maxLength: 60
  *   },
  *   image: {
- *     path: 'groups.main.body.imgs[0].url',
+ *     path: 'groups.main.body.images[0].url',
  *     type: 'image',
  *     defaultValue: '/placeholder.jpg'
  *   },

package/src/mappers/extractors.js CHANGED Viewed

@@ -2,7 +2,7 @@
  * Pre-built extractors for common component patterns
  *
  * All extractors work with the flat content structure:
- * - Root level: title, pretitle, subtitle, paragraphs, links, imgs, items, etc.
+ * - Root level: title, pretitle, subtitle, paragraphs, links, images, items, etc.
  * - Items array: each item has flat structure (title, paragraphs, etc.)
  */
@@ -25,8 +25,8 @@ function hero(parsed) {
         subtitle: parsed?.subtitle || null,
         kicker: parsed?.pretitle || null,
         description: parsed?.paragraphs || [],
-        image: first(parsed?.imgs)?.url || null,
-        imageAlt: first(parsed?.imgs)?.alt || null,
+        image: first(parsed?.images)?.url || null,
+        imageAlt: first(parsed?.images)?.alt || null,
         banner: null, // Banner detection would need to be added separately
         cta: buttonLink || plainLink || null,
     };
@@ -56,8 +56,8 @@ function card(parsed, options = {}) {
             title: content.title || null,
             subtitle: content.subtitle || null,
             description: content.paragraphs || [],
-            image: first(content.imgs)?.url || null,
-            imageAlt: first(content.imgs)?.alt || null,
+            image: first(content.images)?.url || null,
+            imageAlt: first(content.images)?.alt || null,
             icon: first(content.icons) || null,
             link: plainLink || null,
             cta: buttonLink || plainLink || null,
@@ -91,7 +91,7 @@ function article(parsed) {
         date: null,   // Would need metadata support
         banner: null, // Banner detection would need to be added separately
         content: parsed?.paragraphs || [],
-        images: parsed?.imgs || [],
+        images: parsed?.images || [],
         videos: parsed?.videos || [],
         links: parsed?.links || [],
     };
@@ -166,7 +166,7 @@ function features(parsed) {
             subtitle: item.subtitle || null,
             description: item.paragraphs || [],
             icon: first(item.icons) || null,
-            image: first(item.imgs)?.url || null,
+            image: first(item.images)?.url || null,
             link: first(item.links) || null,
         }))
         .filter((feature) => feature.title);
@@ -192,8 +192,8 @@ function testimonial(parsed, options = {}) {
             author: content.title || null,
             role: content.subtitle || null,
             company: content.pretitle || null,
-            image: first(content.imgs)?.url || null,
-            imageAlt: first(content.imgs)?.alt || null,
+            image: first(content.images)?.url || null,
+            imageAlt: first(content.images)?.alt || null,
         };
     };
@@ -275,8 +275,8 @@ function team(parsed) {
             role: item.subtitle || null,
             department: item.pretitle || null,
             bio: item.paragraphs || [],
-            image: first(item.imgs)?.url || null,
-            imageAlt: first(item.imgs)?.alt || null,
+            image: first(item.images)?.url || null,
+            imageAlt: first(item.images)?.alt || null,
             links: item.links || [],
         }))
         .filter((member) => member.name);
@@ -296,14 +296,14 @@ function gallery(parsed, options = {}) {
     const images = [];
     if (source === "main" || source === "all") {
-        const mainImages = parsed?.imgs || [];
+        const mainImages = parsed?.images || [];
         images.push(...mainImages);
     }
     if (source === "items" || source === "all") {
         const items = parsed?.items || [];
         items.forEach((item) => {
-            const itemImages = item.imgs || [];
+            const itemImages = item.images || [];
             images.push(...itemImages);
         });
     }
@@ -339,12 +339,12 @@ function legacy(parsed) {
     const transformToNested = (content) => {
         if (!content) return null;
-        let imgs = content.imgs || [];
-        let banner = imgs.filter((item) => {
+        let images = content.images || [];
+        let banner = images.filter((item) => {
             return (item.role = "banner");
         })?.[0];
-        if (!banner) banner = imgs[0];
+        if (!banner) banner = images[0];
         // Reconstruct deprecated fields from new structure
         const links = content.links || [];
@@ -377,7 +377,7 @@ function legacy(parsed) {
             body: {
                 paragraphs: content.paragraphs || [],
                 headings: content.headings || [],
-                imgs,
+                images,
                 videos: content.videos || [],
                 lists: content.lists || [],
                 links: plainLinks,

package/src/processors/groups.js CHANGED Viewed

@@ -12,11 +12,12 @@ function flattenGroup(group) {
         subtitle2: group.header.subtitle2 || '',
         paragraphs: group.body.paragraphs || [],
         links: group.body.links || [],
-        imgs: group.body.imgs || [],
+        images: group.body.images || [],
         icons: group.body.icons || [],
         lists: group.body.lists || [],
         videos: group.body.videos || [],
         insets: group.body.insets || [],
+        snippets: group.body.snippets || [],
         data: group.body.data || {},
         quotes: group.body.quotes || [],
         headings: group.body.headings || [],
@@ -39,11 +40,12 @@ function processGroups(sequence, options = {}) {
             subtitle2: '',
             paragraphs: [],
             links: [],
-            imgs: [],
+            images: [],
             icons: [],
             lists: [],
             videos: [],
             insets: [],
+            snippets: [],
             data: {},
             quotes: [],
             headings: [],
@@ -76,7 +78,7 @@ function processGroups(sequence, options = {}) {
         subtitle2: '',
         paragraphs: [],
         links: [],
-        imgs: [],
+        images: [],
         icons: [],
         lists: [],
         videos: [],
@@ -173,6 +175,7 @@ function isBannerImage(sequence, i) {
 function readHeadingGroup(sequence, startIdx) {
     const elements = [sequence[startIdx]];
+    let hasGoneDeeper = false;
     // Iterate starting from the next element
     for (let i = startIdx + 1; i < sequence.length; i++) {
@@ -186,6 +189,7 @@ function readHeadingGroup(sequence, startIdx) {
         // Case 1: Strictly Deeper (Standard Subtitle/Deep Header)
         // e.g. H1 -> H2
         if (element.level > previousElement.level) {
+            hasGoneDeeper = true;
             elements.push(element);
             continue;
         }
@@ -198,7 +202,18 @@ function readHeadingGroup(sequence, startIdx) {
             continue;
         }
-        // Otherwise (Sibling or New Section), stop.
+        // Case 3: Same Level Continuation (multi-line heading)
+        // Only before going deeper — once a subtitle level is reached,
+        // same-level headings are new sections, not continuations.
+        // e.g. H1 -> H1 → merged into title array
+        // but H1 -> H2 -> H2 → second H2 starts a new group (items)
+        if (element.level === previousElement.level && !hasGoneDeeper) {
+            elements.push(element);
+            continue;
+        }
+        // Otherwise (New Section — went deeper then back up, or
+        // same-level after going deeper), stop.
         break;
     }
     return elements;
@@ -216,10 +231,11 @@ function processGroupContent(elements) {
     };
     const body = {
-        imgs: [],
+        images: [],
         icons: [],
         videos: [],
         insets: [],
+        snippets: [],
         paragraphs: [],
         links: [],
         lists: [],
@@ -240,6 +256,10 @@ function processGroupContent(elements) {
             metadata,
         };
+    // Track last assigned heading slot and level for same-level merging
+    let lastSlot = null;
+    let lastLevel = null;
     for (let i = 0; i < elements.length; i++) {
         //We shuold only set pretitle once
         if (isPreTitle(elements, i) && !header.pretitle) {
@@ -256,19 +276,29 @@ function processGroupContent(elements) {
             //We shuold set the group level to the highest one instead of the first one.
             metadata.level ??= element.level;
-            // h3 h2 h1 h1
-            // Assign to header fields
-            // h3 h2 h3 h4
-            if (!header.title) {
+            // Same level as last assigned → merge into same slot as array
+            if (lastLevel !== null && element.level === lastLevel && lastSlot) {
+                const current = header[lastSlot];
+                if (Array.isArray(current)) {
+                    current.push(element.text);
+                } else {
+                    header[lastSlot] = [current, element.text];
+                }
+            } else if (!header.title) {
                 header.title = element.text;
+                lastSlot = 'title';
             } else if (!header.subtitle) {
                 header.subtitle = element.text;
+                lastSlot = 'subtitle';
             } else if (!header.subtitle2) {
                 header.subtitle2 = element.text;
+                lastSlot = 'subtitle2';
             } else {
                 // After subtitle2, we're in body - collect heading
                 body.headings.push(element.text);
+                lastSlot = null;
             }
+            lastLevel = element.level;
         } else if (element.type === "list") {
             const listItems = element.children;
@@ -293,7 +323,7 @@ function processGroupContent(elements) {
                     if (element.attrs?.role === "icon") {
                         body.icons.push(element.attrs);
                     } else {
-                        body.imgs.push(preserveProps);
+                        body.images.push(preserveProps);
                     }
                     break;
@@ -339,11 +369,16 @@ function processGroupContent(elements) {
                     break;
                 case "codeBlock":
-                    // Fallback: tagged code blocks where parsing failed at build time
-                    // Untagged blocks stay in sequence for display
                     const tag = element.attrs?.tag;
                     if (tag) {
+                        // Tagged block where parsing failed at build time — store as data
                         body.data[tag] = element.text;
+                    } else {
+                        // Untagged code block — collect as a snippet
+                        body.snippets.push({
+                            language: element.attrs?.language || '',
+                            code: typeof element.text === 'string' ? element.text : '',
+                        });
                     }
                     break;

package/src/processors/groups_backup.js CHANGED Viewed

@@ -187,7 +187,7 @@ function processGroupContent(elements) {
     };
     let banner = null;
     const body = {
-        imgs: [],
+        images: [],
         icons: [],
         videos: [],
         paragraphs: [],
@@ -259,7 +259,7 @@ function processGroupContent(elements) {
                     break;
                 case "image":
-                    body.imgs.push({
+                    body.images.push({
                         url: element.src,
                         caption: element.caption,
                         alt: element.alt,