npm - @uniweb/semantic-parser - Versions diffs - 1.0.8 → 1.0.9 - Mend

@uniweb/semantic-parser 1.0.8 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +52 -104
package/docs/api.md +38 -40
package/docs/mapping-patterns.md +47 -47
package/docs/text-component-reference.md +3 -3
package/package.json +1 -1
package/src/index.js +5 -7
package/src/mappers/extractors.js +113 -120
package/src/processors/groups.js +96 -25
package/src/processors/byType.js +0 -130

package/README.md CHANGED Viewed

@@ -4,11 +4,10 @@ A semantic parser for ProseMirror/TipTap content structures that helps bridge th
 ## What it Does
-The parser transforms rich text editor content (ProseMirror/TipTap) into structured, semantic groups that web components can easily consume. It provides three complementary views of your content:
+The parser transforms rich text editor content (ProseMirror/TipTap) into structured, semantic groups that web components can easily consume. It provides two complementary views of your content:
-1. **Sequence**: A flat, ordered list of all content elements
-2. **Groups**: Content organized into semantic sections with identified main content
-3. **ByType**: Elements categorized by type for easy filtering and queries
+1. **Sequence**: An ordered list of all content elements (for rendering in document order)
+2. **Groups**: Content organized into semantic sections (main content + items)
 ## Installation
@@ -41,16 +40,16 @@ const doc = {
 const result = parseContent(doc);
 // Access different views
-console.log(result.sequence);  // Flat array of elements
-console.log(result.groups);    // Semantic groups with main/items
-console.log(result.byType);    // Elements organized by type
+console.log(result.sequence);  // Ordered array of elements
+console.log(result.title);     // Main content fields at top level
+console.log(result.items);     // Additional content groups
 ```
 ## Output Structure
 ### Sequence View
-A flat array of semantic elements preserving document order:
+An ordered array of semantic elements preserving document order:
 ```js
 result.sequence = [
@@ -59,72 +58,37 @@ result.sequence = [
 ]
 ```
-### Groups View
+### Content Structure
-Content organized into semantic groups:
+Main content fields are at the top level. The `items` array contains additional content groups (e.g., H3 sections), each with the same field structure:
 ```js
-result.groups = {
-  main: {
-    header: {
-      pretitle: "",           // H3 before main title
-      title: "Welcome",       // Main heading
-      subtitle: ""            // Heading after main title
-    },
-    body: {
-      paragraphs: ["Get started today."],
-      imgs: [],
-      videos: [],
-      links: [],
-      lists: [],
-      // ... more content types
-    },
-    banner: null,             // Optional banner image
-    metadata: { level: 1 }
-  },
-  items: [],                  // Additional content groups
-  metadata: {
-    dividerMode: false,       // Using dividers vs headings
-    groups: 0
-  }
-}
-```
-### ByType View
+result = {
+  // Main content fields
+  pretitle: "",             // H3 before main title
+  title: "Welcome",         // Main heading (H1)
+  subtitle: "",             // H2 after main title
+  paragraphs: ["Get started today."],
+  imgs: [],
+  videos: [],
+  links: [],
+  lists: [],
+  icons: [],
+  buttons: [],
+  banner: null,             // Optional banner image
+  // ... more content types
+  // Additional content groups (H3 sections)
+  items: [
+    { title: "Feature 1", paragraphs: [...], links: [...] },
+    { title: "Feature 2", paragraphs: [...], links: [...] }
+  ],
-Elements organized by type with context:
+  // Ordered sequence for document-order rendering
+  sequence: [...],
-```js
-result.byType = {
-  headings: [
-    {
-      type: "heading",
-      level: 1,
-      content: "Welcome",
-      context: {
-        position: 0,
-        previousElement: null,
-        nextElement: { type: "paragraph", ... },
-        nearestHeading: null
-      }
-    }
-  ],
-  paragraphs: [ /* ... */ ],
-  images: {
-    background: [],
-    content: [],
-    gallery: [],
-    icon: []
-  },
-  lists: [],
-  metadata: {
-    totalElements: 2,
-    dominantType: "paragraph",
-    hasMedia: false
-  },
-  // Helper methods
-  getHeadingsByLevel(level),
-  getElementsByHeadingContext(filter)
+  // Original document
+  raw: { type: "doc", content: [...] }
 }
 ```
@@ -133,45 +97,29 @@ result.byType = {
 ### Extracting Main Content
 ```js
-const { groups } = parseContent(doc);
+const content = parseContent(doc);
-const title = groups.main.header.title;
-const description = groups.main.body.paragraphs.join(" ");
-const image = groups.main.banner?.url;
+const title = content.title;
+const description = content.paragraphs.join(" ");
+const image = content.banner?.url;
 ```
 ### Processing Content Sections
 ```js
-const { groups } = parseContent(doc);
+const content = parseContent(doc);
 // Main content
-console.log("Main:", groups.main.header.title);
+console.log("Title:", content.title);
+console.log("Description:", content.paragraphs);
-// Additional sections
-groups.items.forEach(item => {
-  console.log("Section:", item.header.title);
-  console.log("Content:", item.body.paragraphs);
+// Additional sections (H3 groups)
+content.items.forEach(item => {
+  console.log("Section:", item.title);
+  console.log("Content:", item.paragraphs);
 });
 ```
-### Finding Specific Elements
-```js
-const { byType } = parseContent(doc);
-// Get all H2 headings
-const subheadings = byType.getHeadingsByLevel(2);
-// Get all background images
-const backgrounds = byType.images.background;
-// Get content under specific headings
-const features = byType.getElementsByHeadingContext(
-  h => h.content.includes("Features")
-);
-```
 ### Sequential Processing
 ```js
@@ -203,17 +151,17 @@ Automatically transform content based on field types with context-aware behavior
 ```js
 const schema = {
   title: {
-    path: "groups.main.header.title",
+    path: "title",
     type: "plaintext",  // Auto-strips <strong>, <em>, etc.
     maxLength: 60       // Auto-truncates intelligently
   },
   excerpt: {
-    path: "groups.main.body.paragraphs",
+    path: "paragraphs",
     type: "excerpt",    // Auto-creates excerpt from paragraphs
     maxLength: 150
   },
   image: {
-    path: "groups.main.body.imgs[0].url",
+    path: "imgs[0].url",
     type: "image",
     defaultValue: "/placeholder.jpg"
   }
@@ -259,15 +207,15 @@ Define custom mappings using schemas:
 ```js
 const schema = {
-  brand: "groups.main.header.pretitle",
-  title: "groups.main.header.title",
-  subtitle: "groups.main.header.subtitle",
+  brand: "pretitle",
+  title: "title",
+  subtitle: "subtitle",
   image: {
-    path: "groups.main.body.imgs[0].url",
+    path: "imgs[0].url",
     defaultValue: "/placeholder.jpg"
   },
   actions: {
-    path: "groups.main.body.links",
+    path: "links",
     transform: links => links.map(l => ({ label: l.label, type: "primary" }))
   }
 };

package/docs/api.md CHANGED Viewed

@@ -118,51 +118,49 @@ A flat array of semantic elements extracted from the document tree.
 ### `groups`
-Content organized into semantic groups with identified main content and items.
+Content organized into semantic groups with identified main content and items. The structure is flat - header and body fields are merged at the top level.
 ```js
 {
   main: {
-    header: {
-      pretitle: "PRETITLE TEXT",  // H3 before main title
-      title: "Main Title",         // First heading in group
-      subtitle: "Subtitle"         // Second heading in group
-    },
-    body: {
-      paragraphs: ["paragraph text", ...],
-      imgs: [
-        { url: "...", caption: "...", alt: "..." }
-      ],
-      icons: ["<svg>...</svg>", ...],
-      videos: [
-        { src: "...", caption: "...", alt: "..." }
-      ],
-      links: [
-        { href: "...", label: "..." }
-      ],
-      lists: [
-        [/* processed list items */]
-      ],
-      buttons: [
-        { content: "...", attrs: {...} }
-      ],
-      properties: [],       // Code block content
-      propertyBlocks: [],   // Array of code blocks
-      cards: [],            // Not yet implemented
-      headings: []          // Used in list items
-    },
+    // Header fields (flat)
+    pretitle: "PRETITLE TEXT",    // H3 before main title
+    title: "Main Title",           // First heading in group
+    subtitle: "Subtitle",          // Second heading in group
+    // Body fields (flat)
+    paragraphs: ["paragraph text", ...],
+    imgs: [
+      { url: "...", caption: "...", alt: "..." }
+    ],
+    icons: ["<svg>...</svg>", ...],
+    videos: [
+      { src: "...", caption: "...", alt: "..." }
+    ],
+    links: [
+      { href: "...", label: "..." }
+    ],
+    lists: [
+      [/* processed list items */]
+    ],
+    buttons: [
+      { content: "...", attrs: {...} }
+    ],
+    properties: [],       // Code block content
+    propertyBlocks: [],   // Array of code blocks
+    cards: [],            // Not yet implemented
+    headings: [],         // Used in list items
+    // Banner (flat)
     banner: {
       url: "path/to/banner.jpg",
       caption: "Banner caption",
       alt: "Banner alt text"
-    } | null,
-    metadata: {
-      level: 1,             // Heading level that started this group
-      contentTypes: {}      // Set of content types in group
-    }
+    } | null
   },
   items: [
-    // Array of groups with same structure as main
+    // Array of groups with same flat structure as main
+    // { title, pretitle, subtitle, paragraphs, imgs, ... }
   ],
   metadata: {
     dividerMode: false,     // Whether dividers were used for grouping
@@ -268,14 +266,14 @@ const result = parseContent(doc);
 ```js
 const { groups } = parseContent(doc);
-// Access main content
-console.log(groups.main.header.title);
-console.log(groups.main.body.paragraphs);
+// Access main content (flat structure)
+console.log(groups.main.title);
+console.log(groups.main.paragraphs);
 // Iterate through content items
 groups.items.forEach(item => {
-  console.log(item.header.title);
-  console.log(item.body.paragraphs);
+  console.log(item.title);
+  console.log(item.paragraphs);
 });
 ```

package/docs/mapping-patterns.md CHANGED Viewed

@@ -27,17 +27,17 @@ Gracefully handles content issues with silent, automatic cleanup:
 ```js
 const schema = {
   title: {
-    path: "groups.main.header.title",
+    path: "groups.main.title",
     type: "plaintext",  // Auto-strips HTML markup
     maxLength: 60       // Auto-truncates with smart boundaries
   },
   description: {
-    path: "groups.main.body.paragraphs",
+    path: "groups.main.paragraphs",
     type: "excerpt",    // Auto-creates excerpt from paragraphs
     maxLength: 150
   },
   image: {
-    path: "groups.main.body.imgs[0].url",
+    path: "groups.main.imgs[0].url",
     type: "image",      // Normalizes image data
     defaultValue: "/placeholder.jpg",
     treatEmptyAsDefault: true
@@ -74,7 +74,7 @@ Strips all HTML markup, returning clean text. Perfect for titles, labels, and an
 ```js
 {
   title: {
-    path: "groups.main.header.title",
+    path: "groups.main.title",
     type: "plaintext",
     maxLength: 60,              // Auto-truncate
     boundary: "word",            // or "sentence", "character"
@@ -94,7 +94,7 @@ Preserves safe HTML while removing dangerous tags (script, iframe, etc.).
 ```js
 {
   description: {
-    path: "groups.main.body.paragraphs[0]",
+    path: "groups.main.paragraphs[0]",
     type: "richtext",
     allowedTags: ["strong", "em", "a", "br"],  // Customize allowed tags
     stripTags: ["script", "style"]              // Additional tags to remove
@@ -112,7 +112,7 @@ Auto-generates excerpt from content, stripping markup and truncating intelligent
 ```js
 {
   excerpt: {
-    path: "groups.main.body.paragraphs",
+    path: "groups.main.paragraphs",
     type: "excerpt",
     maxLength: 150,
     boundary: "word",             // or "sentence"
@@ -131,7 +131,7 @@ Parses and optionally formats numbers.
 ```js
 {
   price: {
-    path: "groups.main.header.title",
+    path: "groups.main.title",
     type: "number",
     format: {
       decimals: 2,
@@ -152,7 +152,7 @@ Normalizes image data structure.
 ```js
 {
   image: {
-    path: "groups.main.body.imgs[0]",
+    path: "groups.main.imgs[0]",
     type: "image",
     defaultValue: "/placeholder.jpg",
     defaultAlt: "Image"
@@ -170,7 +170,7 @@ Normalizes link data structure.
 ```js
 {
   cta: {
-    path: "groups.main.body.links[0]",
+    path: "groups.main.links[0]",
     type: "link"
   }
 }
@@ -212,34 +212,34 @@ const hints = mappers.validateSchema(parsed, schema, { mode: 'visual-editor' });
 // Component declares its content requirements
 const componentSchema = {
   brand: {
-    path: "groups.main.header.pretitle",
+    path: "groups.main.pretitle",
     type: "plaintext",
     maxLength: 20,
     transform: (text) => text.toUpperCase()
   },
   title: {
-    path: "groups.main.header.title",
+    path: "groups.main.title",
     type: "plaintext",
     maxLength: 60,
     required: true
   },
   subtitle: {
-    path: "groups.main.header.subtitle",
+    path: "groups.main.subtitle",
     type: "plaintext",
     maxLength: 100
   },
   description: {
-    path: "groups.main.body.paragraphs",
+    path: "groups.main.paragraphs",
     type: "excerpt",
     maxLength: 200
   },
   image: {
-    path: "groups.main.body.imgs[0].url",
+    path: "groups.main.imgs[0].url",
     type: "image",
     defaultValue: "/placeholder.jpg"
   },
   cta: {
-    path: "groups.main.body.links[0]",
+    path: "groups.main.links[0]",
     type: "link"
   }
 };
@@ -272,8 +272,8 @@ const heroData = mappers.extractors.hero(parsed);
 // Or use schema-based extraction
 const customData = mappers.extractBySchema(parsed, {
-  title: "groups.main.header.title",
-  image: { path: "groups.main.body.imgs[0].url", defaultValue: "/placeholder.jpg" }
+  title: "groups.main.title",
+  image: { path: "groups.main.imgs[0].url", defaultValue: "/placeholder.jpg" }
 });
 ```
@@ -291,7 +291,7 @@ const image = helpers.first(images, "/default.jpg");
 const lastParagraph = helpers.last(paragraphs);
 // Transform array
-const titles = helpers.transformArray(items, item => item.header.title);
+const titles = helpers.transformArray(items, item => item.title);
 // Filter and transform
 const h2s = helpers.filterArray(headings, h => h.level === 2, h => h.content);
@@ -308,7 +308,7 @@ const cleanArray = helpers.compact([null, "text", "", undefined, "more"]);
 ```js
 // Get nested value safely
-const title = helpers.get(parsed, "groups.main.header.title", "Untitled");
+const title = helpers.get(parsed, "groups.main.title", "Untitled");
 // Pick specific properties
 const metadata = helpers.pick(parsed.groups.main, ["header", "banner"]);
@@ -337,7 +337,7 @@ if (!validation.valid) {
 ```js
 // Wrap extraction in try-catch
 const safeExtractor = helpers.safe((parsed) => {
-  return parsed.groups.main.header.title.toUpperCase();
+  return parsed.groups.main.title.toUpperCase();
 }, "DEFAULT");
 const title = safeExtractor(parsed); // Won't throw if path is invalid
@@ -350,24 +350,24 @@ const title = safeExtractor(parsed); // Won't throw if path is invalid
 ```js
 const { accessor } = mappers;
-// Simple path
-const title = accessor.getByPath(parsed, "groups.main.header.title");
+// Simple path (flat structure)
+const title = accessor.getByPath(parsed, "groups.main.title");
 // Array index notation
-const firstImage = accessor.getByPath(parsed, "groups.main.body.imgs[0].url");
+const firstImage = accessor.getByPath(parsed, "groups.main.imgs[0].url");
 // With default value
-const image = accessor.getByPath(parsed, "groups.main.body.imgs[0].url", {
+const image = accessor.getByPath(parsed, "groups.main.imgs[0].url", {
   defaultValue: "/placeholder.jpg"
 });
 // With transformation
-const description = accessor.getByPath(parsed, "groups.main.body.paragraphs", {
+const description = accessor.getByPath(parsed, "groups.main.paragraphs", {
   transform: (paragraphs) => paragraphs.join(" ")
 });
 // Required field (throws if missing)
-const title = accessor.getByPath(parsed, "groups.main.header.title", {
+const title = accessor.getByPath(parsed, "groups.main.title", {
   required: true
 });
 ```
@@ -378,22 +378,22 @@ Extract multiple fields at once using a schema:
 ```js
 const schema = {
-  // Shorthand: just the path
-  title: "groups.main.header.title",
+  // Shorthand: just the path (flat structure)
+  title: "groups.main.title",
   // Full config with options
   image: {
-    path: "groups.main.body.imgs[0].url",
+    path: "groups.main.imgs[0].url",
     defaultValue: "/placeholder.jpg"
   },
   description: {
-    path: "groups.main.body.paragraphs",
+    path: "groups.main.paragraphs",
     transform: (p) => p.join(" ")
   },
   cta: {
-    path: "groups.main.body.links[0]",
+    path: "groups.main.links[0]",
     required: false
   }
 };
@@ -412,15 +412,15 @@ const data = accessor.extractBySchema(parsed, schema);
 Extract data from array of items:
 ```js
-// Simple: extract single field from each item
-const titles = accessor.mapArray(parsed, "groups.items", "header.title");
+// Simple: extract single field from each item (flat structure)
+const titles = accessor.mapArray(parsed, "groups.items", "title");
 // ["Item 1", "Item 2", "Item 3"]
 // Complex: extract multiple fields from each item
 const cards = accessor.mapArray(parsed, "groups.items", {
-  title: "header.title",
-  text: { path: "body.paragraphs", transform: p => p.join(" ") },
-  image: { path: "body.imgs[0].url", defaultValue: "/default.jpg" }
+  title: "title",
+  text: { path: "paragraphs", transform: p => p.join(" ") },
+  image: { path: "imgs[0].url", defaultValue: "/default.jpg" }
 });
 // [
 //   { title: "...", text: "...", image: "..." },
@@ -436,11 +436,11 @@ if (accessor.hasPath(parsed, "groups.main.banner.url")) {
   // Banner exists
 }
-// Get first existing path
+// Get first existing path (flat structure)
 const image = accessor.getFirstExisting(parsed, [
   "groups.main.banner.url",
-  "groups.main.body.imgs[0].url",
-  "groups.items[0].body.imgs[0].url"
+  "groups.main.imgs[0].url",
+  "groups.items[0].imgs[0].url"
 ], "/fallback.jpg");
 ```
@@ -640,8 +640,8 @@ const enhancedData = {
   relatedPosts: helpers.transformArray(
     accessor.getByPath(parsed, "groups.items", { defaultValue: [] }),
     item => ({
-      title: item.header.title,
-      link: helpers.first(item.body.links)
+      title: item.title,
+      link: helpers.first(item.links)
     })
   ),
@@ -662,13 +662,13 @@ const componentSchema = {
   content: {
     type: "hero", // Use pre-built extractor
     // OR
-    mapping: {    // Use custom mapping
-      brand: "groups.main.header.pretitle",
-      title: "groups.main.header.title",
-      subtitle: "groups.main.header.subtitle",
-      image: { path: "groups.main.body.imgs[0].url", defaultValue: "/default.jpg" },
+    mapping: {    // Use custom mapping (flat paths)
+      brand: "groups.main.pretitle",
+      title: "groups.main.title",
+      subtitle: "groups.main.subtitle",
+      image: { path: "groups.main.imgs[0].url", defaultValue: "/default.jpg" },
       actions: {
-        path: "groups.main.body.links",
+        path: "groups.main.links",
         transform: links => links.map(l => ({ label: l.label, type: "primary" }))
       }
     }

package/docs/text-component-reference.md CHANGED Viewed

@@ -274,9 +274,9 @@ function Card({ data }) {
 import { getByPath, extractBySchema } from '@uniweb/semantic-parser/mappers/accessor';
 const schema = {
-  title: { path: 'groups.main.header.title' },
-  subtitle: { path: 'groups.main.header.subtitle' },
-  content: { path: 'groups.main.body.paragraphs' }
+  title: { path: 'groups.main.title' },
+  subtitle: { path: 'groups.main.subtitle' },
+  content: { path: 'groups.main.paragraphs' }
 };
 const data = extractBySchema(parsed, schema);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@uniweb/semantic-parser",
-  "version": "1.0.8",
+  "version": "1.0.9",
   "description": "Semantic parser for ProseMirror/TipTap content structures",
   "type": "module",
   "main": "./src/index.js",

package/src/index.js CHANGED Viewed

@@ -1,6 +1,5 @@
 import { processSequence } from "./processors/sequence.js";
 import { processGroups } from "./processors/groups.js";
-import { processByType } from "./processors/byType.js";
 import * as mappers from "./mappers/index.js";
 /**
@@ -8,7 +7,7 @@ import * as mappers from "./mappers/index.js";
  * @param {Object} doc - ProseMirror document
  * @param {Object} options - Parsing options
  * @param {boolean} options.parseCodeAsJson - Parse code blocks as JSON. Default: false
- * @returns {Object} Parsed content structure
+ * @returns {Object} Flat content structure with sequence for ordered access
  */
 function parseContent(doc, options = {}) {
     // Default options
@@ -17,18 +16,17 @@ function parseContent(doc, options = {}) {
         ...options,
     };
-    // Process content in different ways
+    // Process sequence (ordered elements)
     const sequence = processSequence(doc, opts);
+    // Process groups (semantic structure) - returns flat object
     const groups = processGroups(sequence, opts);
-    const byType = processByType(sequence);
+    // Return flat structure with sequence at top level
     return {
         raw: doc,
         sequence,
-        groups,
-        byType,
+        ...groups,  // Spread flat content: title, paragraphs, items, etc.
     };
 }

package/src/mappers/extractors.js CHANGED Viewed

@@ -1,5 +1,9 @@
 /**
  * Pre-built extractors for common component patterns
+ *
+ * All extractors work with the flat content structure:
+ * - Root level: title, pretitle, subtitle, paragraphs, links, imgs, items, etc.
+ * - Items array: each item has flat structure (title, paragraphs, etc.)
  */
 import { first, joinParagraphs } from "./helpers.js";
@@ -12,18 +16,16 @@ import { first, joinParagraphs } from "./helpers.js";
  * @returns {Object} Hero component data
  */
 function hero(parsed) {
-    const main = parsed.groups?.main;
     return {
-        title: main?.header?.title || null,
-        subtitle: main?.header?.subtitle || null,
-        kicker: main?.header?.pretitle || null,
-        description: main?.body?.paragraphs || [],
-        image: first(main?.body?.imgs)?.url || null,
-        imageAlt: first(main?.body?.imgs)?.alt || null,
-        banner: main?.banner?.url || null,
-        cta: first(main?.body?.links) || null,
-        button: first(main?.body?.buttons) || null,
+        title: parsed?.title || null,
+        subtitle: parsed?.subtitle || null,
+        kicker: parsed?.pretitle || null,
+        description: parsed?.paragraphs || [],
+        image: first(parsed?.imgs)?.url || null,
+        imageAlt: first(parsed?.imgs)?.alt || null,
+        banner: null, // Banner detection would need to be added separately
+        cta: first(parsed?.links) || null,
+        button: first(parsed?.buttons) || null,
     };
 }
@@ -40,30 +42,30 @@ function hero(parsed) {
 function card(parsed, options = {}) {
     const { useItems = false, itemIndex } = options;
-    const extractCard = (group) => {
-        if (!group) return null;
+    const extractCard = (content) => {
+        if (!content) return null;
         return {
-            title: group.header?.title || null,
-            subtitle: group.header?.subtitle || null,
-            description: group.body?.paragraphs || [],
-            image: first(group.body?.imgs)?.url || null,
-            imageAlt: first(group.body?.imgs)?.alt || null,
-            icon: first(group.body?.icons) || null,
-            link: first(group.body?.links) || null,
-            button: first(group.body?.buttons) || null,
+            title: content.title || null,
+            subtitle: content.subtitle || null,
+            description: content.paragraphs || [],
+            image: first(content.imgs)?.url || null,
+            imageAlt: first(content.imgs)?.alt || null,
+            icon: first(content.icons) || null,
+            link: first(content.links) || null,
+            button: first(content.buttons) || null,
         };
     };
     if (useItems) {
-        const items = parsed.groups?.items || [];
+        const items = parsed?.items || [];
         if (itemIndex !== undefined) {
             return extractCard(items[itemIndex]);
         }
         return items.map(extractCard).filter(Boolean);
     }
-    return extractCard(parsed.groups?.main);
+    return extractCard(parsed);
 }
 /**
@@ -74,19 +76,17 @@ function card(parsed, options = {}) {
  * @returns {Object} Article data
  */
 function article(parsed) {
-    const main = parsed.groups?.main;
     return {
-        title: main?.header?.title || null,
-        subtitle: main?.header?.subtitle || null,
-        kicker: main?.header?.pretitle || null,
-        author: main?.metadata?.author || null,
-        date: main?.metadata?.date || null,
-        banner: main?.banner?.url || null,
-        content: main?.body?.paragraphs || [],
-        images: main?.body?.imgs || [],
-        videos: main?.body?.videos || [],
-        links: main?.body?.links || [],
+        title: parsed?.title || null,
+        subtitle: parsed?.subtitle || null,
+        kicker: parsed?.pretitle || null,
+        author: null, // Would need metadata support
+        date: null,   // Would need metadata support
+        banner: null, // Banner detection would need to be added separately
+        content: parsed?.paragraphs || [],
+        images: parsed?.imgs || [],
+        videos: parsed?.videos || [],
+        links: parsed?.links || [],
     };
 }
@@ -98,14 +98,13 @@ function article(parsed) {
  * @returns {Array} Array of stat objects
  */
 function stats(parsed) {
-    const items = parsed.groups?.items || [];
+    const items = parsed?.items || [];
     return items
         .map((item) => ({
-            value: item.header?.title || null,
-            label:
-                item.header?.subtitle || first(item.body?.paragraphs) || null,
-            description: item.body?.paragraphs || [],
+            value: item.title || null,
+            label: item.subtitle || first(item.paragraphs) || null,
+            description: item.paragraphs || [],
         }))
         .filter((stat) => stat.value);
 }
@@ -118,17 +117,17 @@ function stats(parsed) {
  * @returns {Array} Navigation items
  */
 function navigation(parsed) {
-    const items = parsed.groups?.items || [];
+    const items = parsed?.items || [];
     return items
         .map((item) => {
             const navItem = {
-                label: item.header?.title || null,
-                href: first(item.body?.links)?.href || null,
+                label: item.title || null,
+                href: first(item.links)?.href || null,
             };
             // Extract children from nested lists
-            const firstList = first(item.body?.lists);
+            const firstList = first(item.lists);
             if (firstList && firstList.length > 0) {
                 navItem.children = firstList
                     .map((listItem) => ({
@@ -152,16 +151,16 @@ function navigation(parsed) {
  * @returns {Array} Feature items
  */
 function features(parsed) {
-    const items = parsed.groups?.items || [];
+    const items = parsed?.items || [];
     return items
         .map((item) => ({
-            title: item.header?.title || null,
-            subtitle: item.header?.subtitle || null,
-            description: item.body?.paragraphs || [],
-            icon: first(item.body?.icons) || null,
-            image: first(item.body?.imgs)?.url || null,
-            link: first(item.body?.links) || null,
+            title: item.title || null,
+            subtitle: item.subtitle || null,
+            description: item.paragraphs || [],
+            icon: first(item.icons) || null,
+            image: first(item.imgs)?.url || null,
+            link: first(item.links) || null,
         }))
         .filter((feature) => feature.title);
 }
@@ -178,25 +177,25 @@ function features(parsed) {
 function testimonial(parsed, options = {}) {
     const { useItems = false } = options;
-    const extractTestimonial = (group) => {
-        if (!group) return null;
+    const extractTestimonial = (content) => {
+        if (!content) return null;
         return {
-            quote: group.body?.paragraphs || [],
-            author: group.header?.title || null,
-            role: group.header?.subtitle || null,
-            company: group.header?.pretitle || null,
-            image: first(group.body?.imgs)?.url || null,
-            imageAlt: first(group.body?.imgs)?.alt || null,
+            quote: content.paragraphs || [],
+            author: content.title || null,
+            role: content.subtitle || null,
+            company: content.pretitle || null,
+            image: first(content.imgs)?.url || null,
+            imageAlt: first(content.imgs)?.alt || null,
         };
     };
     if (useItems) {
-        const items = parsed.groups?.items || [];
+        const items = parsed?.items || [];
         return items.map(extractTestimonial).filter(Boolean);
     }
-    return extractTestimonial(parsed.groups?.main);
+    return extractTestimonial(parsed);
 }
 /**
@@ -207,13 +206,13 @@ function testimonial(parsed, options = {}) {
  * @returns {Array} FAQ items
  */
 function faq(parsed) {
-    const items = parsed.groups?.items || [];
+    const items = parsed?.items || [];
     return items
         .map((item) => ({
-            question: item.header?.title || null,
-            answer: item.body?.paragraphs || [],
-            links: item.body?.links || [],
+            question: item.title || null,
+            answer: item.paragraphs || [],
+            links: item.links || [],
         }))
         .filter((item) => item.question);
 }
@@ -226,16 +225,16 @@ function faq(parsed) {
  * @returns {Array} Pricing tiers
  */
 function pricing(parsed) {
-    const items = parsed.groups?.items || [];
+    const items = parsed?.items || [];
     return items
         .map((item) => {
-            const firstList = first(item.body?.lists);
+            const firstList = first(item.lists);
             return {
-                name: item.header?.title || null,
-                price: item.header?.subtitle || null,
-                description: first(item.body?.paragraphs) || null,
+                name: item.title || null,
+                price: item.subtitle || null,
+                description: first(item.paragraphs) || null,
                 features: firstList
                     ? firstList
                           .map((listItem) =>
@@ -243,13 +242,9 @@ function pricing(parsed) {
                           )
                           .filter(Boolean)
                     : [],
-                cta:
-                    first(item.body?.links) ||
-                    first(item.body?.buttons) ||
-                    null,
+                cta: first(item.links) || first(item.buttons) || null,
                 highlighted:
-                    item.header?.pretitle?.toLowerCase().includes("popular") ||
-                    false,
+                    item.pretitle?.toLowerCase().includes("popular") || false,
             };
         })
         .filter((tier) => tier.name);
@@ -263,17 +258,17 @@ function pricing(parsed) {
  * @returns {Array} Team members
  */
 function team(parsed) {
-    const items = parsed.groups?.items || [];
+    const items = parsed?.items || [];
     return items
         .map((item) => ({
-            name: item.header?.title || null,
-            role: item.header?.subtitle || null,
-            department: item.header?.pretitle || null,
-            bio: item.body?.paragraphs || [],
-            image: first(item.body?.imgs)?.url || null,
-            imageAlt: first(item.body?.imgs)?.alt || null,
-            links: item.body?.links || [],
+            name: item.title || null,
+            role: item.subtitle || null,
+            department: item.pretitle || null,
+            bio: item.paragraphs || [],
+            image: first(item.imgs)?.url || null,
+            imageAlt: first(item.imgs)?.alt || null,
+            links: item.links || [],
         }))
         .filter((member) => member.name);
 }
@@ -292,14 +287,14 @@ function gallery(parsed, options = {}) {
     const images = [];
     if (source === "main" || source === "all") {
-        const mainImages = parsed.groups?.main?.body?.imgs || [];
+        const mainImages = parsed?.imgs || [];
         images.push(...mainImages);
     }
     if (source === "items" || source === "all") {
-        const items = parsed.groups?.items || [];
+        const items = parsed?.items || [];
         items.forEach((item) => {
-            const itemImages = item.body?.imgs || [];
+            const itemImages = item.imgs || [];
             images.push(...itemImages);
         });
     }
@@ -315,26 +310,24 @@ function gallery(parsed, options = {}) {
  * Extract content in legacy Article class format
  * Used for backward compatibility with existing components
  *
- * This extractor transforms the new parser output into the exact format
+ * This extractor transforms the new flat parser output into the nested format
  * used by the legacy Article class, enabling drop-in replacement without
  * breaking existing components.
  *
- * @param {Object} parsed - Parsed content from parseContent()
- * @returns {Object} Legacy format { main, items }
+ * @param {Object} parsed - Parsed content from parseContent() (flat structure)
+ * @returns {Object} Legacy format { main, items } with nested header/body structure
  *
  * @example
  * const { parseContent, mappers } = require('@uniweb/semantic-parser');
- * const parsed = parseContent(doc, { pretitleLevel: 2, parseCodeAsJson: true });
+ * const parsed = parseContent(doc);
  * const legacy = mappers.extractors.legacy(parsed);
- * // Returns: { main: {...}, items: [...] }
+ * // Returns: { main: { header: {...}, body: {...} }, items: [...] }
  */
 function legacy(parsed) {
-    const groups = parsed.groups || {};
-    const transformGroup = (group) => {
-        if (!group) return null;
+    const transformToNested = (content) => {
+        if (!content) return null;
-        let imgs = group.body?.imgs || [];
+        let imgs = content.imgs || [];
         let banner = imgs.filter((item) => {
             return (item.role = "banner");
         })?.[0];
@@ -343,41 +336,41 @@ function legacy(parsed) {
         return {
             header: {
-                title: group.header?.title || "",
-                subtitle: group.header?.subtitle || "",
-                subtitle2: group.header?.subtitle2 || "",
-                pretitle: group.header?.pretitle || "",
+                title: content.title || "",
+                subtitle: content.subtitle || "",
+                subtitle2: content.subtitle2 || "",
+                pretitle: content.pretitle || "",
                 // Auto-fill description (legacy behavior)
                 description:
-                    group.header?.subtitle2 ||
-                    first(group.body?.paragraphs) ||
+                    content.subtitle2 ||
+                    first(content.paragraphs) ||
                     "",
-                alignment: group.header?.alignment || "",
+                alignment: content.alignment || "",
             },
             banner,
             body: {
-                paragraphs: group.body?.paragraphs || [],
-                headings: group.body?.headings || [],
+                paragraphs: content.paragraphs || [],
+                headings: content.headings || [],
                 imgs,
-                videos: group.body?.videos || [],
-                lists: group.body?.lists || [],
-                links: group.body?.links || [],
-                icons: group.body?.icons || [],
-                buttons: group.body?.buttons || [],
-                cards: group.body?.cards || [],
-                documents: group.body?.documents || [],
-                forms: group.body?.forms || [],
-                form: first(group.body?.forms) || null,
-                quotes: group.body?.quotes || [],
-                properties: group.body?.properties || {},
-                propertyBlocks: group.body?.propertyBlocks || [],
+                videos: content.videos || [],
+                lists: content.lists || [],
+                links: content.links || [],
+                icons: content.icons || [],
+                buttons: content.buttons || [],
+                cards: content.cards || [],
+                documents: content.documents || [],
+                forms: content.forms || [],
+                form: first(content.forms) || null,
+                quotes: content.quotes || [],
+                properties: content.properties || {},
+                propertyBlocks: content.propertyBlocks || [],
             },
         };
     };
     return {
-        main: transformGroup(groups.main),
-        items: (groups.items || []).map(transformGroup),
+        main: transformToNested(parsed),
+        items: (parsed?.items || []).map(transformToNested),
     };
 }

package/src/processors/groups.js CHANGED Viewed

@@ -1,42 +1,113 @@
+/**
+ * Flatten a group's nested structure to a flat object
+ * @param {Object} group Processed group with { header, body, metadata }
+ * @returns {Object} Flat content object
+ */
+function flattenGroup(group) {
+    if (!group) return null;
+    return {
+        title: group.header.title || '',
+        pretitle: group.header.pretitle || '',
+        subtitle: group.header.subtitle || '',
+        subtitle2: group.header.subtitle2 || '',
+        alignment: group.header.alignment || null,
+        paragraphs: group.body.paragraphs || [],
+        links: group.body.links || [],
+        imgs: group.body.imgs || [],
+        icons: group.body.icons || [],
+        lists: group.body.lists || [],
+        videos: group.body.videos || [],
+        buttons: group.body.buttons || [],
+        properties: group.body.properties || {},
+        propertyBlocks: group.body.propertyBlocks || [],
+        cards: group.body.cards || [],
+        documents: group.body.documents || [],
+        forms: group.body.forms || [],
+        quotes: group.body.quotes || [],
+        headings: group.body.headings || [],
+    };
+}
 /**
  * Transform a sequence into content groups with semantic structure
  * @param {Array} sequence Flat sequence of elements
  * @param {Object} options Parsing options
- * @returns {Object} Content organized into groups with identified main content
+ * @returns {Object} Flat content object with items array
  */
 function processGroups(sequence, options = {}) {
-    const result = {
-        main: null,
-        items: [],
-        metadata: {
-            dividerMode: false,
-            groups: 0,
-        },
-    };
-    if (!sequence.length) return result;
+    // Empty content returns flat empty structure
+    if (!sequence.length) {
+        return {
+            title: '',
+            pretitle: '',
+            subtitle: '',
+            subtitle2: '',
+            alignment: null,
+            paragraphs: [],
+            links: [],
+            imgs: [],
+            icons: [],
+            lists: [],
+            videos: [],
+            buttons: [],
+            properties: {},
+            propertyBlocks: [],
+            cards: [],
+            documents: [],
+            forms: [],
+            quotes: [],
+            headings: [],
+            items: [],
+        };
+    }
     const groups = splitBySlices(sequence);
-    // Process each group's structure
+    // Process each group's structure (still nested internally)
     const processedGroups = groups.map((group) => processGroupContent(group));
-    // Special handling for first group in divider mode
-    if (result.metadata.dividerMode && groups.startsWithDivider) {
-        result.items = processedGroups;
+    // Determine main vs items
+    let mainGroup = null;
+    let itemGroups = [];
+    const shouldBeMain = identifyMainContent(processedGroups);
+    if (shouldBeMain) {
+        mainGroup = processedGroups[0];
+        itemGroups = processedGroups.slice(1);
     } else {
-        // Organize into main content and items
-        const shouldBeMain = identifyMainContent(processedGroups);
-        if (shouldBeMain) {
-            result.main = processedGroups[0];
-            result.items = processedGroups.slice(1);
-        } else {
-            result.items = processedGroups;
-        }
+        itemGroups = processedGroups;
     }
-    // result.metadata.groups = processedGroups.length;
-    return result;
+    // Flatten main content (or return empty flat structure)
+    const flatMain = flattenGroup(mainGroup) || {
+        title: '',
+        pretitle: '',
+        subtitle: '',
+        subtitle2: '',
+        alignment: null,
+        paragraphs: [],
+        links: [],
+        imgs: [],
+        icons: [],
+        lists: [],
+        videos: [],
+        buttons: [],
+        properties: {},
+        propertyBlocks: [],
+        cards: [],
+        documents: [],
+        forms: [],
+        quotes: [],
+        headings: [],
+    };
+    // Flatten items
+    const flatItems = itemGroups.map(flattenGroup);
+    return {
+        ...flatMain,
+        items: flatItems,
+    };
 }
 function splitBySlices(sequence) {

package/src/processors/byType.js DELETED Viewed

@@ -1,130 +0,0 @@
-/**
- * Organize content elements by their type while preserving context
- * @param {Array} sequence Flat sequence of elements
- * @returns {Object} Content organized by type
- */
-function processByType(sequence) {
-  const collections = {
-    headings: [],
-    paragraphs: [],
-    images: {
-      background: [],
-      content: [],
-      gallery: [],
-      icon: [],
-    },
-    lists: [],
-    dividers: [],
-    metadata: {
-      totalElements: sequence.length,
-      dominantType: null,
-      hasMedia: false,
-    },
-  };
-  // Track type frequencies for metadata
-  const typeFrequency = new Map();
-  sequence.forEach((element, index) => {
-    // Track element type frequency
-    typeFrequency.set(element.type, (typeFrequency.get(element.type) || 0) + 1);
-    // Add context information
-    const context = getElementContext(sequence, index);
-    const enrichedElement = { ...element, context };
-    // Process element based on type
-    switch (element.type) {
-      case "heading":
-        collections.headings.push(enrichedElement);
-        break;
-      case "paragraph":
-        collections.paragraphs.push(enrichedElement);
-        break;
-      case "image": {
-        // Support both attrs.role and top-level role for backwards compatibility
-        const role = element.attrs?.role || element.role || "content";
-        if (!collections.images[role]) {
-          collections.images[role] = [];
-        }
-        collections.images[role].push(enrichedElement);
-        collections.metadata.hasMedia = true;
-        break;
-      }
-      case "list":
-        collections.lists.push(enrichedElement);
-        break;
-      case "divider":
-        collections.dividers.push(enrichedElement);
-        break;
-    }
-  });
-  // Calculate dominant type
-  let maxFrequency = 0;
-  typeFrequency.forEach((frequency, type) => {
-    if (frequency > maxFrequency) {
-      maxFrequency = frequency;
-      collections.metadata.dominantType = type;
-    }
-  });
-  // Add helper methods
-  addCollectionHelpers(collections);
-  return collections;
-}
-/**
- * Get context information for an element
- */
-function getElementContext(sequence, position) {
-  const context = {
-    position,
-    previousElement: position > 0 ? sequence[position - 1] : null,
-    nextElement: position < sequence.length - 1 ? sequence[position + 1] : null,
-    nearestHeading: null,
-  };
-  // Find nearest preceding heading
-  for (let i = position - 1; i >= 0; i--) {
-    if (sequence[i].type === "heading") {
-      context.nearestHeading = sequence[i];
-      break;
-    }
-  }
-  return context;
-}
-/**
- * Add helper methods to collections
- */
-function addCollectionHelpers(collections) {
-  // Get headings of specific level
-  collections.getHeadingsByLevel = function (level) {
-    return this.headings.filter((h) => h.level === level);
-  };
-  // Get elements by heading context
-  collections.getElementsByHeadingContext = function (headingFilter) {
-    const allElements = [
-      ...this.paragraphs,
-      ...Object.values(this.images).flat(),
-      ...this.lists,
-    ];
-    return allElements.filter(
-      (el) =>
-        el.context?.nearestHeading && headingFilter(el.context.nearestHeading)
-    );
-  };
-}
-export {
-  processByType
-};