@uniweb/semantic-parser 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -52,35 +52,26 @@ const result = parseContent(doc);
52
52
  // }
53
53
  ```
54
54
 
55
- ### Content Group Structure
55
+ ### Content Output Structure
56
56
 
57
- Groups follow a specific structure defined in `processGroupContent()`:
57
+ The parser returns a flat content structure:
58
58
 
59
59
  ```js
60
60
  {
61
- header: {
62
- pretitle: '', // H3 before main title
63
- title: '', // Main heading (H1 or H2)
64
- subtitle: '' // Heading after main title
65
- },
66
- body: {
67
- imgs: [],
68
- icons: [],
69
- videos: [],
70
- paragraphs: [],
71
- links: [],
72
- lists: [],
73
- buttons: [],
74
- properties: [],
75
- propertyBlocks: [],
76
- cards: [],
77
- headings: []
78
- },
79
- banner: null, // Image with banner role or image before heading
80
- metadata: {
81
- level: null, // Heading level that started this group
82
- contentTypes: Set()
83
- }
61
+ title: '', // Main heading
62
+ pretitle: '', // Heading before main title
63
+ subtitle: '', // Heading after main title
64
+ paragraphs: [],
65
+ links: [],
66
+ imgs: [],
67
+ icons: [],
68
+ videos: [],
69
+ lists: [],
70
+ buttons: [],
71
+ data: {}, // Tagged code blocks (keyed by tag name)
72
+ cards: [],
73
+ headings: [],
74
+ items: [], // Child content groups
84
75
  }
85
76
  ```
86
77
 
@@ -102,6 +93,32 @@ The sequence processor identifies several special element types by inspecting pa
102
93
 
103
94
  These are extracted into dedicated element types for easier downstream processing.
104
95
 
96
+ ### Tagged Code Blocks
97
+
98
+ Code blocks with tags route parsed data to the `data` object:
99
+
100
+ ```markdown
101
+ ```json:nav-links
102
+ [{ "label": "Home", "href": "/" }]
103
+ ```
104
+
105
+ ```yaml:config
106
+ title: My Site
107
+ theme: dark
108
+ ```
109
+ ```
110
+
111
+ Results in:
112
+ ```js
113
+ content.data['nav-links'] = [{ label: "Home", href: "/" }]
114
+ content.data['config'] = { title: "My Site", theme: "dark" }
115
+ ```
116
+
117
+ **Parsing rules:**
118
+ - Tagged blocks with `json` language: parsed as JSON
119
+ - Tagged blocks with `yaml`/`yml` language: parsed as YAML
120
+ - Untagged blocks: not parsed (stay as raw text in sequence for display)
121
+
105
122
  ### List Processing
106
123
 
107
124
  Lists maintain hierarchy through nested structure. The `processListItems()` function in sequence.js handles nested lists, while `processListContent()` in groups.js applies full group content processing to each list item, allowing lists to contain rich content (images, paragraphs, nested lists, etc.).
package/README.md CHANGED
@@ -4,11 +4,10 @@ A semantic parser for ProseMirror/TipTap content structures that helps bridge th
4
4
 
5
5
  ## What it Does
6
6
 
7
- The parser transforms rich text editor content (ProseMirror/TipTap) into structured, semantic groups that web components can easily consume. It provides three complementary views of your content:
7
+ The parser transforms rich text editor content (ProseMirror/TipTap) into structured, semantic groups that web components can easily consume. It provides two complementary views of your content:
8
8
 
9
- 1. **Sequence**: A flat, ordered list of all content elements
10
- 2. **Groups**: Content organized into semantic sections with identified main content
11
- 3. **ByType**: Elements categorized by type for easy filtering and queries
9
+ 1. **Sequence**: An ordered list of all content elements (for rendering in document order)
10
+ 2. **Groups**: Content organized into semantic sections (main content + items)
12
11
 
13
12
  ## Installation
14
13
 
@@ -41,16 +40,16 @@ const doc = {
41
40
  const result = parseContent(doc);
42
41
 
43
42
  // Access different views
44
- console.log(result.sequence); // Flat array of elements
45
- console.log(result.groups); // Semantic groups with main/items
46
- console.log(result.byType); // Elements organized by type
43
+ console.log(result.sequence); // Ordered array of elements
44
+ console.log(result.title); // Main content fields at top level
45
+ console.log(result.items); // Additional content groups
47
46
  ```
48
47
 
49
48
  ## Output Structure
50
49
 
51
50
  ### Sequence View
52
51
 
53
- A flat array of semantic elements preserving document order:
52
+ An ordered array of semantic elements preserving document order:
54
53
 
55
54
  ```js
56
55
  result.sequence = [
@@ -59,72 +58,37 @@ result.sequence = [
59
58
  ]
60
59
  ```
61
60
 
62
- ### Groups View
61
+ ### Content Structure
63
62
 
64
- Content organized into semantic groups:
63
+ Main content fields are at the top level. The `items` array contains additional content groups (created when headings appear after content), each with the same field structure:
65
64
 
66
65
  ```js
67
- result.groups = {
68
- main: {
69
- header: {
70
- pretitle: "", // H3 before main title
71
- title: "Welcome", // Main heading
72
- subtitle: "" // Heading after main title
73
- },
74
- body: {
75
- paragraphs: ["Get started today."],
76
- imgs: [],
77
- videos: [],
78
- links: [],
79
- lists: [],
80
- // ... more content types
81
- },
82
- banner: null, // Optional banner image
83
- metadata: { level: 1 }
84
- },
85
- items: [], // Additional content groups
86
- metadata: {
87
- dividerMode: false, // Using dividers vs headings
88
- groups: 0
89
- }
90
- }
91
- ```
92
-
93
- ### ByType View
66
+ result = {
67
+ // Main content fields
68
+ pretitle: "", // Heading before main title
69
+ title: "Welcome", // Main heading
70
+ subtitle: "", // Heading after main title
71
+ paragraphs: ["Get started today."],
72
+ imgs: [],
73
+ videos: [],
74
+ links: [],
75
+ lists: [],
76
+ icons: [],
77
+ buttons: [],
78
+ banner: null, // Optional banner image
79
+ // ... more content types
80
+
81
+ // Additional content groups (from headings after content)
82
+ items: [
83
+ { title: "Feature 1", paragraphs: [...], links: [...] },
84
+ { title: "Feature 2", paragraphs: [...], links: [...] }
85
+ ],
94
86
 
95
- Elements organized by type with context:
87
+ // Ordered sequence for document-order rendering
88
+ sequence: [...],
96
89
 
97
- ```js
98
- result.byType = {
99
- headings: [
100
- {
101
- type: "heading",
102
- level: 1,
103
- content: "Welcome",
104
- context: {
105
- position: 0,
106
- previousElement: null,
107
- nextElement: { type: "paragraph", ... },
108
- nearestHeading: null
109
- }
110
- }
111
- ],
112
- paragraphs: [ /* ... */ ],
113
- images: {
114
- background: [],
115
- content: [],
116
- gallery: [],
117
- icon: []
118
- },
119
- lists: [],
120
- metadata: {
121
- totalElements: 2,
122
- dominantType: "paragraph",
123
- hasMedia: false
124
- },
125
- // Helper methods
126
- getHeadingsByLevel(level),
127
- getElementsByHeadingContext(filter)
90
+ // Original document
91
+ raw: { type: "doc", content: [...] }
128
92
  }
129
93
  ```
130
94
 
@@ -133,45 +97,29 @@ result.byType = {
133
97
  ### Extracting Main Content
134
98
 
135
99
  ```js
136
- const { groups } = parseContent(doc);
100
+ const content = parseContent(doc);
137
101
 
138
- const title = groups.main.header.title;
139
- const description = groups.main.body.paragraphs.join(" ");
140
- const image = groups.main.banner?.url;
102
+ const title = content.title;
103
+ const description = content.paragraphs.join(" ");
104
+ const image = content.banner?.url;
141
105
  ```
142
106
 
143
107
  ### Processing Content Sections
144
108
 
145
109
  ```js
146
- const { groups } = parseContent(doc);
110
+ const content = parseContent(doc);
147
111
 
148
112
  // Main content
149
- console.log("Main:", groups.main.header.title);
113
+ console.log("Title:", content.title);
114
+ console.log("Description:", content.paragraphs);
150
115
 
151
- // Additional sections
152
- groups.items.forEach(item => {
153
- console.log("Section:", item.header.title);
154
- console.log("Content:", item.body.paragraphs);
116
+ // Additional content groups
117
+ content.items.forEach(item => {
118
+ console.log("Section:", item.title);
119
+ console.log("Content:", item.paragraphs);
155
120
  });
156
121
  ```
157
122
 
158
- ### Finding Specific Elements
159
-
160
- ```js
161
- const { byType } = parseContent(doc);
162
-
163
- // Get all H2 headings
164
- const subheadings = byType.getHeadingsByLevel(2);
165
-
166
- // Get all background images
167
- const backgrounds = byType.images.background;
168
-
169
- // Get content under specific headings
170
- const features = byType.getElementsByHeadingContext(
171
- h => h.content.includes("Features")
172
- );
173
- ```
174
-
175
123
  ### Sequential Processing
176
124
 
177
125
  ```js
@@ -203,17 +151,17 @@ Automatically transform content based on field types with context-aware behavior
203
151
  ```js
204
152
  const schema = {
205
153
  title: {
206
- path: "groups.main.header.title",
154
+ path: "title",
207
155
  type: "plaintext", // Auto-strips <strong>, <em>, etc.
208
156
  maxLength: 60 // Auto-truncates intelligently
209
157
  },
210
158
  excerpt: {
211
- path: "groups.main.body.paragraphs",
159
+ path: "paragraphs",
212
160
  type: "excerpt", // Auto-creates excerpt from paragraphs
213
161
  maxLength: 150
214
162
  },
215
163
  image: {
216
- path: "groups.main.body.imgs[0].url",
164
+ path: "imgs[0].url",
217
165
  type: "image",
218
166
  defaultValue: "/placeholder.jpg"
219
167
  }
@@ -259,15 +207,15 @@ Define custom mappings using schemas:
259
207
 
260
208
  ```js
261
209
  const schema = {
262
- brand: "groups.main.header.pretitle",
263
- title: "groups.main.header.title",
264
- subtitle: "groups.main.header.subtitle",
210
+ brand: "pretitle",
211
+ title: "title",
212
+ subtitle: "subtitle",
265
213
  image: {
266
- path: "groups.main.body.imgs[0].url",
214
+ path: "imgs[0].url",
267
215
  defaultValue: "/placeholder.jpg"
268
216
  },
269
217
  actions: {
270
- path: "groups.main.body.links",
218
+ path: "links",
271
219
  transform: links => links.map(l => ({ label: l.label, type: "primary" }))
272
220
  }
273
221
  };
package/docs/api.md CHANGED
@@ -118,51 +118,49 @@ A flat array of semantic elements extracted from the document tree.
118
118
 
119
119
  ### `groups`
120
120
 
121
- Content organized into semantic groups with identified main content and items.
121
+ Content organized into semantic groups with identified main content and items. The structure is flat - header and body fields are merged at the top level.
122
122
 
123
123
  ```js
124
124
  {
125
125
  main: {
126
- header: {
127
- pretitle: "PRETITLE TEXT", // H3 before main title
128
- title: "Main Title", // First heading in group
129
- subtitle: "Subtitle" // Second heading in group
130
- },
131
- body: {
132
- paragraphs: ["paragraph text", ...],
133
- imgs: [
134
- { url: "...", caption: "...", alt: "..." }
135
- ],
136
- icons: ["<svg>...</svg>", ...],
137
- videos: [
138
- { src: "...", caption: "...", alt: "..." }
139
- ],
140
- links: [
141
- { href: "...", label: "..." }
142
- ],
143
- lists: [
144
- [/* processed list items */]
145
- ],
146
- buttons: [
147
- { content: "...", attrs: {...} }
148
- ],
149
- properties: [], // Code block content
150
- propertyBlocks: [], // Array of code blocks
151
- cards: [], // Not yet implemented
152
- headings: [] // Used in list items
153
- },
126
+ // Header fields (flat)
127
+ pretitle: "PRETITLE TEXT", // H3 before main title
128
+ title: "Main Title", // First heading in group
129
+ subtitle: "Subtitle", // Second heading in group
130
+
131
+ // Body fields (flat)
132
+ paragraphs: ["paragraph text", ...],
133
+ imgs: [
134
+ { url: "...", caption: "...", alt: "..." }
135
+ ],
136
+ icons: ["<svg>...</svg>", ...],
137
+ videos: [
138
+ { src: "...", caption: "...", alt: "..." }
139
+ ],
140
+ links: [
141
+ { href: "...", label: "..." }
142
+ ],
143
+ lists: [
144
+ [/* processed list items */]
145
+ ],
146
+ buttons: [
147
+ { content: "...", attrs: {...} }
148
+ ],
149
+ properties: [], // Code block content
150
+ propertyBlocks: [], // Array of code blocks
151
+ cards: [], // Not yet implemented
152
+ headings: [], // Used in list items
153
+
154
+ // Banner (flat)
154
155
  banner: {
155
156
  url: "path/to/banner.jpg",
156
157
  caption: "Banner caption",
157
158
  alt: "Banner alt text"
158
- } | null,
159
- metadata: {
160
- level: 1, // Heading level that started this group
161
- contentTypes: {} // Set of content types in group
162
- }
159
+ } | null
163
160
  },
164
161
  items: [
165
- // Array of groups with same structure as main
162
+ // Array of groups with same flat structure as main
163
+ // { title, pretitle, subtitle, paragraphs, imgs, ... }
166
164
  ],
167
165
  metadata: {
168
166
  dividerMode: false, // Whether dividers were used for grouping
@@ -268,14 +266,14 @@ const result = parseContent(doc);
268
266
  ```js
269
267
  const { groups } = parseContent(doc);
270
268
 
271
- // Access main content
272
- console.log(groups.main.header.title);
273
- console.log(groups.main.body.paragraphs);
269
+ // Access main content (flat structure)
270
+ console.log(groups.main.title);
271
+ console.log(groups.main.paragraphs);
274
272
 
275
273
  // Iterate through content items
276
274
  groups.items.forEach(item => {
277
- console.log(item.header.title);
278
- console.log(item.body.paragraphs);
275
+ console.log(item.title);
276
+ console.log(item.paragraphs);
279
277
  });
280
278
  ```
281
279