@uniweb/semantic-parser 1.0.9 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +78 -27
- package/README.md +15 -12
- package/docs/entity-consolidation.md +467 -0
- package/package.json +4 -1
- package/src/mappers/extractors.js +40 -14
- package/src/processors/groups.js +47 -41
- package/src/processors/sequence.js +59 -11
package/AGENTS.md
CHANGED
|
@@ -52,35 +52,52 @@ const result = parseContent(doc);
|
|
|
52
52
|
// }
|
|
53
53
|
```
|
|
54
54
|
|
|
55
|
-
### Content
|
|
55
|
+
### Content Output Structure
|
|
56
56
|
|
|
57
|
-
|
|
57
|
+
The parser returns a flat content structure:
|
|
58
58
|
|
|
59
59
|
```js
|
|
60
60
|
{
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
61
|
+
title: '', // Main heading
|
|
62
|
+
pretitle: '', // Heading before main title
|
|
63
|
+
subtitle: '', // Heading after main title
|
|
64
|
+
subtitle2: '', // Third heading level
|
|
65
|
+
paragraphs: [],
|
|
66
|
+
links: [], // All link-like entities (including buttons, documents)
|
|
67
|
+
imgs: [],
|
|
68
|
+
icons: [],
|
|
69
|
+
videos: [],
|
|
70
|
+
lists: [],
|
|
71
|
+
quotes: [],
|
|
72
|
+
data: {}, // Structured data (tagged code blocks, forms, cards)
|
|
73
|
+
headings: [], // Overflow headings after title/subtitle/subtitle2
|
|
74
|
+
items: [], // Child content groups (same structure recursively)
|
|
75
|
+
}
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Link Roles
|
|
79
|
+
|
|
80
|
+
Links include buttons and documents, distinguished by `role`:
|
|
81
|
+
|
|
82
|
+
```js
|
|
83
|
+
links: [
|
|
84
|
+
{ href: "/page", label: "Learn More", role: "link" },
|
|
85
|
+
{ href: "/action", label: "Get Started", role: "button", variant: "primary" },
|
|
86
|
+
{ href: "/file.pdf", label: "Download", role: "document", download: true },
|
|
87
|
+
]
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Structured Data
|
|
91
|
+
|
|
92
|
+
The `data` object holds all structured content:
|
|
93
|
+
|
|
94
|
+
```js
|
|
95
|
+
data: {
|
|
96
|
+
"nav-links": [...], // From ```json:nav-links or ```yaml:nav-links
|
|
97
|
+
"config": {...}, // From ```yaml:config
|
|
98
|
+
"form": {...}, // From FormBlock editor widget
|
|
99
|
+
"person": [...], // From card-group with cardType="person"
|
|
100
|
+
"event": [...] // From card-group with cardType="event"
|
|
84
101
|
}
|
|
85
102
|
```
|
|
86
103
|
|
|
@@ -97,10 +114,44 @@ The sequence processor identifies several special element types by inspecting pa
|
|
|
97
114
|
- **Links**: Paragraphs containing only a single link mark
|
|
98
115
|
- **Images**: Paragraphs with single image (role: 'image' or 'banner')
|
|
99
116
|
- **Icons**: Paragraphs with single image (role: 'icon')
|
|
100
|
-
- **Buttons**:
|
|
117
|
+
- **Buttons**: Editor `button` nodes → mapped to links with `role: "button"`
|
|
101
118
|
- **Videos**: Paragraphs with single image (role: 'video')
|
|
102
119
|
|
|
103
|
-
|
|
120
|
+
### Editor Node Mappings
|
|
121
|
+
|
|
122
|
+
Editor-specific nodes are mapped to standard entities:
|
|
123
|
+
- `button` node → `links[]` with `role: "button"` and `variant` attribute
|
|
124
|
+
- `FormBlock` → `data.form`
|
|
125
|
+
- `card-group` → `data[cardType]` arrays (e.g., `data.person`, `data.event`)
|
|
126
|
+
- `document-group` → `links[]` with `role: "document"` and `download: true`
|
|
127
|
+
|
|
128
|
+
See `docs/entity-consolidation.md` for complete mapping documentation.
|
|
129
|
+
|
|
130
|
+
### Tagged Code Blocks
|
|
131
|
+
|
|
132
|
+
Code blocks with tags route parsed data to the `data` object:
|
|
133
|
+
|
|
134
|
+
```markdown
|
|
135
|
+
```json:nav-links
|
|
136
|
+
[{ "label": "Home", "href": "/" }]
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
```yaml:config
|
|
140
|
+
title: My Site
|
|
141
|
+
theme: dark
|
|
142
|
+
```
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Results in:
|
|
146
|
+
```js
|
|
147
|
+
content.data['nav-links'] = [{ label: "Home", href: "/" }]
|
|
148
|
+
content.data['config'] = { title: "My Site", theme: "dark" }
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
**Parsing rules:**
|
|
152
|
+
- Tagged blocks with `json` language: parsed as JSON
|
|
153
|
+
- Tagged blocks with `yaml`/`yml` language: parsed as YAML
|
|
154
|
+
- Untagged blocks: not parsed (stay as raw text in sequence for display)
|
|
104
155
|
|
|
105
156
|
### List Processing
|
|
106
157
|
|
package/README.md
CHANGED
|
@@ -60,25 +60,28 @@ result.sequence = [
|
|
|
60
60
|
|
|
61
61
|
### Content Structure
|
|
62
62
|
|
|
63
|
-
Main content fields are at the top level. The `items` array contains additional content groups (
|
|
63
|
+
Main content fields are at the top level. The `items` array contains additional content groups (created when headings appear after content), each with the same field structure:
|
|
64
64
|
|
|
65
65
|
```js
|
|
66
66
|
result = {
|
|
67
|
-
//
|
|
68
|
-
pretitle: "", //
|
|
69
|
-
title: "Welcome", // Main heading
|
|
70
|
-
subtitle: "", //
|
|
67
|
+
// Header fields (from headings)
|
|
68
|
+
pretitle: "", // Heading before main title
|
|
69
|
+
title: "Welcome", // Main heading
|
|
70
|
+
subtitle: "", // Heading after main title
|
|
71
|
+
subtitle2: "", // Third heading level
|
|
72
|
+
|
|
73
|
+
// Body fields
|
|
71
74
|
paragraphs: ["Get started today."],
|
|
75
|
+
links: [], // All links (including buttons, documents)
|
|
72
76
|
imgs: [],
|
|
73
77
|
videos: [],
|
|
74
|
-
links: [],
|
|
75
|
-
lists: [],
|
|
76
78
|
icons: [],
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
//
|
|
79
|
+
lists: [],
|
|
80
|
+
quotes: [],
|
|
81
|
+
data: {}, // Structured data (tagged code blocks, forms, cards)
|
|
82
|
+
headings: [], // Overflow headings after title/subtitle/subtitle2
|
|
80
83
|
|
|
81
|
-
// Additional content groups (
|
|
84
|
+
// Additional content groups (from headings after content)
|
|
82
85
|
items: [
|
|
83
86
|
{ title: "Feature 1", paragraphs: [...], links: [...] },
|
|
84
87
|
{ title: "Feature 2", paragraphs: [...], links: [...] }
|
|
@@ -113,7 +116,7 @@ const content = parseContent(doc);
|
|
|
113
116
|
console.log("Title:", content.title);
|
|
114
117
|
console.log("Description:", content.paragraphs);
|
|
115
118
|
|
|
116
|
-
// Additional
|
|
119
|
+
// Additional content groups
|
|
117
120
|
content.items.forEach(item => {
|
|
118
121
|
console.log("Section:", item.title);
|
|
119
122
|
console.log("Content:", item.paragraphs);
|
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
# Semantic Parser Entity Consolidation
|
|
2
|
+
|
|
3
|
+
This document defines the standard semantic entities output by the parser and how editor-specific node types map to them.
|
|
4
|
+
|
|
5
|
+
## Design Principle
|
|
6
|
+
|
|
7
|
+
**Editor nodes are authoring conveniences → Parser outputs standardized semantic entities**
|
|
8
|
+
|
|
9
|
+
The semantic parser accepts ProseMirror/TipTap documents from two sources:
|
|
10
|
+
1. **File-based markdown** via `@uniweb/content-reader`
|
|
11
|
+
2. **Visual editor** via TipTap with custom node types
|
|
12
|
+
|
|
13
|
+
Both sources must produce the same standardized output. Editor-specific node types (like `card-group`, `FormBlock`, `button` node) are conveniences that map to standard entities.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Standard Entity Set
|
|
18
|
+
|
|
19
|
+
After consolidation, the parser outputs this flat structure:
|
|
20
|
+
|
|
21
|
+
```js
|
|
22
|
+
{
|
|
23
|
+
// Header fields (from headings)
|
|
24
|
+
title: '',
|
|
25
|
+
pretitle: '',
|
|
26
|
+
subtitle: '',
|
|
27
|
+
subtitle2: '',
|
|
28
|
+
|
|
29
|
+
// Body fields
|
|
30
|
+
paragraphs: [], // Text blocks with inline HTML formatting
|
|
31
|
+
links: [], // All link-like entities (buttons, documents, nav links)
|
|
32
|
+
imgs: [], // All images (with role distinguishing purpose)
|
|
33
|
+
videos: [], // Video embeds
|
|
34
|
+
icons: [], // Standalone icons
|
|
35
|
+
lists: [], // Bullet/ordered lists (recursive structure)
|
|
36
|
+
quotes: [], // Blockquotes (recursive structure)
|
|
37
|
+
data: {}, // Structured data (tagged code blocks, forms, cards)
|
|
38
|
+
headings: [], // Overflow headings after title/subtitle/subtitle2
|
|
39
|
+
|
|
40
|
+
items: [], // Semantic groups (same structure recursively)
|
|
41
|
+
}
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Removed Fields
|
|
45
|
+
|
|
46
|
+
| Field | Status | Reason |
|
|
47
|
+
|-------|--------|--------|
|
|
48
|
+
| `alignment` | **Deprecated** | Editor-only concept, not expressible in markdown |
|
|
49
|
+
| `buttons` | **Merged into `links`** | Buttons are styled links |
|
|
50
|
+
| `cards` | **Merged into `data`** | Structured data with schema tag |
|
|
51
|
+
| `documents` | **Merged into `links`** | Documents are downloadable links |
|
|
52
|
+
| `forms` | **Merged into `data`** | Structured data with `form` tag |
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Entity Specifications
|
|
57
|
+
|
|
58
|
+
### Links
|
|
59
|
+
|
|
60
|
+
All link-like content merges into the `links` array. The `role` attribute distinguishes behavior.
|
|
61
|
+
|
|
62
|
+
```js
|
|
63
|
+
{
|
|
64
|
+
href: "/contact",
|
|
65
|
+
label: "Contact Us",
|
|
66
|
+
|
|
67
|
+
// Role distinguishes link type
|
|
68
|
+
role: "link", // Default: standard hyperlink
|
|
69
|
+
| "button" // Call-to-action button
|
|
70
|
+
| "button-primary" // Primary CTA
|
|
71
|
+
| "button-outline" // Outline style button
|
|
72
|
+
| "nav-link" // Navigation link
|
|
73
|
+
| "footer-link" // Footer navigation
|
|
74
|
+
| "document" // Downloadable file
|
|
75
|
+
|
|
76
|
+
// Button-specific attributes (when role is button-*)
|
|
77
|
+
variant: "primary" | "secondary" | "outline" | "ghost",
|
|
78
|
+
size: "sm" | "md" | "lg",
|
|
79
|
+
icon: "icon-name",
|
|
80
|
+
|
|
81
|
+
// Link behavior
|
|
82
|
+
target: "_blank" | "_self",
|
|
83
|
+
rel: "noopener noreferrer",
|
|
84
|
+
download: true | "filename.pdf",
|
|
85
|
+
}
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
**Markdown syntax:**
|
|
89
|
+
```markdown
|
|
90
|
+
[Standard link](/page)
|
|
91
|
+
[Button link](button:/action){variant=primary}
|
|
92
|
+
[Download](report.pdf){download}
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Images
|
|
96
|
+
|
|
97
|
+
All image content uses the `imgs` array. The `role` attribute distinguishes purpose.
|
|
98
|
+
|
|
99
|
+
```js
|
|
100
|
+
{
|
|
101
|
+
url: "/images/hero.jpg",
|
|
102
|
+
alt: "Hero image",
|
|
103
|
+
caption: "Optional caption",
|
|
104
|
+
|
|
105
|
+
// Role distinguishes image purpose
|
|
106
|
+
role: "image", // Default: content image
|
|
107
|
+
| "icon" // Small icon/logo
|
|
108
|
+
| "background" // Section background
|
|
109
|
+
| "gallery" // Gallery item
|
|
110
|
+
| "banner" // Hero/banner image
|
|
111
|
+
|
|
112
|
+
// Layout attributes
|
|
113
|
+
direction: "left" | "right" | "center",
|
|
114
|
+
size: "basic" | "lg" | "full",
|
|
115
|
+
|
|
116
|
+
// Styling
|
|
117
|
+
filter: "grayscale" | "blur",
|
|
118
|
+
theme: "light" | "dark",
|
|
119
|
+
|
|
120
|
+
// Link wrapper (clickable image)
|
|
121
|
+
href: "/link-target",
|
|
122
|
+
}
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Data (Structured Content)
|
|
126
|
+
|
|
127
|
+
The `data` object holds all structured content from tagged code blocks and editor widgets.
|
|
128
|
+
|
|
129
|
+
```js
|
|
130
|
+
{
|
|
131
|
+
// From tagged code blocks
|
|
132
|
+
"form": { fields: [...], submitLabel: "Send" },
|
|
133
|
+
"nav-links": [{ label: "Home", href: "/" }],
|
|
134
|
+
"config": { theme: "dark" },
|
|
135
|
+
|
|
136
|
+
// From editor card widgets (mapped by type)
|
|
137
|
+
"person": [
|
|
138
|
+
{ name: "John", title: "CEO", ... },
|
|
139
|
+
{ name: "Jane", title: "CTO", ... },
|
|
140
|
+
],
|
|
141
|
+
"event": [
|
|
142
|
+
{ title: "Launch Party", date: "2024-01-15", location: "NYC", ... },
|
|
143
|
+
],
|
|
144
|
+
}
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
**Markdown syntax for structured data:**
|
|
148
|
+
```markdown
|
|
149
|
+
```yaml:form
|
|
150
|
+
fields:
|
|
151
|
+
- name: email
|
|
152
|
+
type: email
|
|
153
|
+
required: true
|
|
154
|
+
submitLabel: Subscribe
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
```json:nav-links
|
|
158
|
+
[{ "label": "Home", "href": "/" }]
|
|
159
|
+
```
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## Editor Node Mappings
|
|
165
|
+
|
|
166
|
+
This section documents how TipTap/editor-specific nodes map to standard entities.
|
|
167
|
+
|
|
168
|
+
### `button` Node → `links[]`
|
|
169
|
+
|
|
170
|
+
**Editor input:**
|
|
171
|
+
```js
|
|
172
|
+
{
|
|
173
|
+
type: "button",
|
|
174
|
+
content: [{ type: "text", text: "Click me" }],
|
|
175
|
+
attrs: {
|
|
176
|
+
href: "/action",
|
|
177
|
+
variant: "primary",
|
|
178
|
+
size: "lg",
|
|
179
|
+
icon: "arrow-right"
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
**Standard output:**
|
|
185
|
+
```js
|
|
186
|
+
links: [{
|
|
187
|
+
href: "/action",
|
|
188
|
+
label: "Click me",
|
|
189
|
+
role: "button",
|
|
190
|
+
variant: "primary",
|
|
191
|
+
size: "lg",
|
|
192
|
+
icon: "arrow-right"
|
|
193
|
+
}]
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### `FormBlock` Node → `data.form`
|
|
197
|
+
|
|
198
|
+
**Editor input:**
|
|
199
|
+
```js
|
|
200
|
+
{
|
|
201
|
+
type: "FormBlock",
|
|
202
|
+
attrs: {
|
|
203
|
+
data: {
|
|
204
|
+
fields: [{ name: "email", type: "email" }],
|
|
205
|
+
submitLabel: "Subscribe"
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
**Standard output:**
|
|
212
|
+
```js
|
|
213
|
+
data: {
|
|
214
|
+
form: {
|
|
215
|
+
fields: [{ name: "email", type: "email" }],
|
|
216
|
+
submitLabel: "Subscribe"
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### `card-group` Node → `data[cardType]`
|
|
222
|
+
|
|
223
|
+
Cards are editor widgets for structured entities like people, events, addresses. Each card type becomes a key in `data`, with an array of all cards of that type. This follows the same pattern as tagged code blocks.
|
|
224
|
+
|
|
225
|
+
**Editor input:**
|
|
226
|
+
```js
|
|
227
|
+
{
|
|
228
|
+
type: "card-group",
|
|
229
|
+
content: [
|
|
230
|
+
{
|
|
231
|
+
type: "card",
|
|
232
|
+
attrs: {
|
|
233
|
+
cardType: "person",
|
|
234
|
+
title: "Jane Doe",
|
|
235
|
+
subtitle: "CEO",
|
|
236
|
+
coverImg: { src: "/jane.jpg" },
|
|
237
|
+
address: '{"city": "NYC"}',
|
|
238
|
+
icon: { svg: "..." }
|
|
239
|
+
}
|
|
240
|
+
},
|
|
241
|
+
{
|
|
242
|
+
type: "card",
|
|
243
|
+
attrs: {
|
|
244
|
+
cardType: "person",
|
|
245
|
+
title: "John Smith",
|
|
246
|
+
subtitle: "CTO",
|
|
247
|
+
coverImg: { src: "/john.jpg" }
|
|
248
|
+
}
|
|
249
|
+
},
|
|
250
|
+
{
|
|
251
|
+
type: "card",
|
|
252
|
+
attrs: {
|
|
253
|
+
cardType: "event",
|
|
254
|
+
title: "Launch Party",
|
|
255
|
+
date: "2024-03-15",
|
|
256
|
+
location: "San Francisco"
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
]
|
|
260
|
+
}
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
**Standard output:**
|
|
264
|
+
```js
|
|
265
|
+
data: {
|
|
266
|
+
person: [
|
|
267
|
+
{
|
|
268
|
+
title: "Jane Doe",
|
|
269
|
+
subtitle: "CEO",
|
|
270
|
+
coverImg: "/jane.jpg",
|
|
271
|
+
address: { city: "NYC" },
|
|
272
|
+
icon: { svg: "..." }
|
|
273
|
+
},
|
|
274
|
+
{
|
|
275
|
+
title: "John Smith",
|
|
276
|
+
subtitle: "CTO",
|
|
277
|
+
coverImg: "/john.jpg"
|
|
278
|
+
}
|
|
279
|
+
],
|
|
280
|
+
event: [
|
|
281
|
+
{
|
|
282
|
+
title: "Launch Party",
|
|
283
|
+
date: "2024-03-15",
|
|
284
|
+
location: "San Francisco"
|
|
285
|
+
}
|
|
286
|
+
]
|
|
287
|
+
}
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
**Accessing cards by type:**
|
|
291
|
+
```js
|
|
292
|
+
// Get all person cards
|
|
293
|
+
const people = content.data.person || [];
|
|
294
|
+
|
|
295
|
+
// Get all event cards
|
|
296
|
+
const events = content.data.event || [];
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
**Card schemas:**
|
|
300
|
+
| Schema | Common Fields |
|
|
301
|
+
|--------|---------------|
|
|
302
|
+
| `person` | title (name), subtitle (role), coverImg (photo), address |
|
|
303
|
+
| `event` | title, date, location, description |
|
|
304
|
+
| `address` | street, city, state, country, postal |
|
|
305
|
+
| `document` | title, href, coverImg (preview), fileType |
|
|
306
|
+
|
|
307
|
+
### `document-group` Node → `links[]`
|
|
308
|
+
|
|
309
|
+
Documents are downloadable files. They map to links with `role: "document"`.
|
|
310
|
+
|
|
311
|
+
**Editor input:**
|
|
312
|
+
```js
|
|
313
|
+
{
|
|
314
|
+
type: "document-group",
|
|
315
|
+
content: [
|
|
316
|
+
{
|
|
317
|
+
type: "document",
|
|
318
|
+
attrs: {
|
|
319
|
+
title: "Annual Report",
|
|
320
|
+
src: "/reports/annual-2024.pdf",
|
|
321
|
+
coverImg: { src: "/preview.jpg" }
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
]
|
|
325
|
+
}
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
**Standard output:**
|
|
329
|
+
```js
|
|
330
|
+
links: [{
|
|
331
|
+
href: "/reports/annual-2024.pdf",
|
|
332
|
+
label: "Annual Report",
|
|
333
|
+
role: "document",
|
|
334
|
+
download: true,
|
|
335
|
+
preview: "/preview.jpg"
|
|
336
|
+
}]
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
---
|
|
340
|
+
|
|
341
|
+
## Deprecation: `alignment`
|
|
342
|
+
|
|
343
|
+
The `alignment` field was extracted from heading's `textAlign` attribute in the editor. This is an editor-specific styling concern that:
|
|
344
|
+
- Cannot be expressed in file-based markdown
|
|
345
|
+
- Is a presentation concern, not semantic content
|
|
346
|
+
- Should be handled by component styling, not content structure
|
|
347
|
+
|
|
348
|
+
**Migration:** Components relying on `content.alignment` should:
|
|
349
|
+
1. Use CSS/Tailwind for text alignment
|
|
350
|
+
2. Or accept alignment as a component `param` in frontmatter
|
|
351
|
+
|
|
352
|
+
---
|
|
353
|
+
|
|
354
|
+
## Migration Path
|
|
355
|
+
|
|
356
|
+
### Phase 1: Add Mappings (Non-Breaking)
|
|
357
|
+
|
|
358
|
+
1. Continue outputting legacy fields (`buttons`, `cards`, `documents`, `forms`, `alignment`)
|
|
359
|
+
2. Also populate new locations (`links` for buttons/documents, `data` for cards/forms)
|
|
360
|
+
3. Components can migrate gradually
|
|
361
|
+
|
|
362
|
+
### Phase 2: Deprecation Warnings
|
|
363
|
+
|
|
364
|
+
1. Log warnings when legacy fields are accessed
|
|
365
|
+
2. Document migration for each field
|
|
366
|
+
3. Provide codemod or migration script
|
|
367
|
+
|
|
368
|
+
### Phase 3: Remove Legacy Fields
|
|
369
|
+
|
|
370
|
+
1. Remove `buttons`, `cards`, `documents`, `forms`, `alignment` from output
|
|
371
|
+
2. Update all components to use new structure
|
|
372
|
+
3. Update documentation
|
|
373
|
+
|
|
374
|
+
---
|
|
375
|
+
|
|
376
|
+
## Backwards Compatibility
|
|
377
|
+
|
|
378
|
+
During migration, the parser can provide a compatibility layer:
|
|
379
|
+
|
|
380
|
+
```js
|
|
381
|
+
// Parser option
|
|
382
|
+
const content = parse(doc, {
|
|
383
|
+
legacyFields: true // Include deprecated fields
|
|
384
|
+
});
|
|
385
|
+
|
|
386
|
+
// Or via getter that warns
|
|
387
|
+
Object.defineProperty(content, 'buttons', {
|
|
388
|
+
get() {
|
|
389
|
+
console.warn('content.buttons is deprecated, use content.links with role="button"');
|
|
390
|
+
return content.links.filter(l => l.role?.startsWith('button'));
|
|
391
|
+
}
|
|
392
|
+
});
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
---
|
|
396
|
+
|
|
397
|
+
## Component Migration Examples
|
|
398
|
+
|
|
399
|
+
### Before: Using `buttons`
|
|
400
|
+
|
|
401
|
+
```jsx
|
|
402
|
+
function CTA({ content }) {
|
|
403
|
+
const { links, buttons } = content;
|
|
404
|
+
return (
|
|
405
|
+
<div>
|
|
406
|
+
{links.map(link => <a href={link.href}>{link.label}</a>)}
|
|
407
|
+
{buttons.map(btn => <button>{btn.content}</button>)}
|
|
408
|
+
</div>
|
|
409
|
+
);
|
|
410
|
+
}
|
|
411
|
+
```
|
|
412
|
+
|
|
413
|
+
### After: Unified `links`
|
|
414
|
+
|
|
415
|
+
```jsx
|
|
416
|
+
function CTA({ content }) {
|
|
417
|
+
const { links } = content;
|
|
418
|
+
const buttons = links.filter(l => l.role?.startsWith('button'));
|
|
419
|
+
const plainLinks = links.filter(l => !l.role?.startsWith('button'));
|
|
420
|
+
|
|
421
|
+
return (
|
|
422
|
+
<div>
|
|
423
|
+
{plainLinks.map(link => <a href={link.href}>{link.label}</a>)}
|
|
424
|
+
{buttons.map(btn => (
|
|
425
|
+
<a href={btn.href} className={`btn btn-${btn.variant}`}>
|
|
426
|
+
{btn.label}
|
|
427
|
+
</a>
|
|
428
|
+
))}
|
|
429
|
+
</div>
|
|
430
|
+
);
|
|
431
|
+
}
|
|
432
|
+
```
|
|
433
|
+
|
|
434
|
+
### Or: Role-based rendering
|
|
435
|
+
|
|
436
|
+
```jsx
|
|
437
|
+
function CTA({ content }) {
|
|
438
|
+
return (
|
|
439
|
+
<div>
|
|
440
|
+
{content.links.map(link => {
|
|
441
|
+
if (link.role?.startsWith('button')) {
|
|
442
|
+
return <Button variant={link.variant}>{link.label}</Button>;
|
|
443
|
+
}
|
|
444
|
+
if (link.role === 'document') {
|
|
445
|
+
return <DownloadLink href={link.href}>{link.label}</DownloadLink>;
|
|
446
|
+
}
|
|
447
|
+
return <a href={link.href}>{link.label}</a>;
|
|
448
|
+
})}
|
|
449
|
+
</div>
|
|
450
|
+
);
|
|
451
|
+
}
|
|
452
|
+
```
|
|
453
|
+
|
|
454
|
+
---
|
|
455
|
+
|
|
456
|
+
## Implementation Checklist
|
|
457
|
+
|
|
458
|
+
- [ ] Update `processGroupContent` in `groups.js` to map button → links
|
|
459
|
+
- [ ] Update `processGroupContent` to map card-group → data.cards
|
|
460
|
+
- [ ] Update `processGroupContent` to map document-group → links
|
|
461
|
+
- [ ] Update `processGroupContent` to map FormBlock → data.form
|
|
462
|
+
- [ ] Remove `alignment` from header extraction
|
|
463
|
+
- [ ] Add `legacyFields` option for backwards compatibility
|
|
464
|
+
- [ ] Update `flattenGroup` to use new structure
|
|
465
|
+
- [ ] Update tests for new entity structure
|
|
466
|
+
- [ ] Update AGENTS.md and README.md
|
|
467
|
+
- [ ] Create migration guide for components
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@uniweb/semantic-parser",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.11",
|
|
4
4
|
"description": "Semantic parser for ProseMirror/TipTap content structures",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/index.js",
|
|
@@ -33,6 +33,9 @@
|
|
|
33
33
|
"doc": "docs",
|
|
34
34
|
"test": "tests"
|
|
35
35
|
},
|
|
36
|
+
"dependencies": {
|
|
37
|
+
"yaml": "^2.8.2"
|
|
38
|
+
},
|
|
36
39
|
"scripts": {
|
|
37
40
|
"test": "NODE_OPTIONS=--experimental-vm-modules jest",
|
|
38
41
|
"test-report": "NODE_OPTIONS=--experimental-vm-modules jest --json > test-results.json 2>&1",
|
|
@@ -16,6 +16,10 @@ import { first, joinParagraphs } from "./helpers.js";
|
|
|
16
16
|
* @returns {Object} Hero component data
|
|
17
17
|
*/
|
|
18
18
|
function hero(parsed) {
|
|
19
|
+
const links = parsed?.links || [];
|
|
20
|
+
const buttonLink = links.find(l => l.role?.startsWith('button'));
|
|
21
|
+
const plainLink = links.find(l => !l.role?.startsWith('button'));
|
|
22
|
+
|
|
19
23
|
return {
|
|
20
24
|
title: parsed?.title || null,
|
|
21
25
|
subtitle: parsed?.subtitle || null,
|
|
@@ -24,8 +28,7 @@ function hero(parsed) {
|
|
|
24
28
|
image: first(parsed?.imgs)?.url || null,
|
|
25
29
|
imageAlt: first(parsed?.imgs)?.alt || null,
|
|
26
30
|
banner: null, // Banner detection would need to be added separately
|
|
27
|
-
cta:
|
|
28
|
-
button: first(parsed?.buttons) || null,
|
|
31
|
+
cta: buttonLink || plainLink || null,
|
|
29
32
|
};
|
|
30
33
|
}
|
|
31
34
|
|
|
@@ -45,6 +48,10 @@ function card(parsed, options = {}) {
|
|
|
45
48
|
const extractCard = (content) => {
|
|
46
49
|
if (!content) return null;
|
|
47
50
|
|
|
51
|
+
const links = content.links || [];
|
|
52
|
+
const buttonLink = links.find(l => l.role?.startsWith('button'));
|
|
53
|
+
const plainLink = links.find(l => !l.role?.startsWith('button'));
|
|
54
|
+
|
|
48
55
|
return {
|
|
49
56
|
title: content.title || null,
|
|
50
57
|
subtitle: content.subtitle || null,
|
|
@@ -52,8 +59,8 @@ function card(parsed, options = {}) {
|
|
|
52
59
|
image: first(content.imgs)?.url || null,
|
|
53
60
|
imageAlt: first(content.imgs)?.alt || null,
|
|
54
61
|
icon: first(content.icons) || null,
|
|
55
|
-
link:
|
|
56
|
-
|
|
62
|
+
link: plainLink || null,
|
|
63
|
+
cta: buttonLink || plainLink || null,
|
|
57
64
|
};
|
|
58
65
|
};
|
|
59
66
|
|
|
@@ -230,6 +237,8 @@ function pricing(parsed) {
|
|
|
230
237
|
return items
|
|
231
238
|
.map((item) => {
|
|
232
239
|
const firstList = first(item.lists);
|
|
240
|
+
const links = item.links || [];
|
|
241
|
+
const buttonLink = links.find(l => l.role?.startsWith('button'));
|
|
233
242
|
|
|
234
243
|
return {
|
|
235
244
|
name: item.title || null,
|
|
@@ -242,7 +251,7 @@ function pricing(parsed) {
|
|
|
242
251
|
)
|
|
243
252
|
.filter(Boolean)
|
|
244
253
|
: [],
|
|
245
|
-
cta:
|
|
254
|
+
cta: buttonLink || first(links) || null,
|
|
246
255
|
highlighted:
|
|
247
256
|
item.pretitle?.toLowerCase().includes("popular") || false,
|
|
248
257
|
};
|
|
@@ -314,6 +323,9 @@ function gallery(parsed, options = {}) {
|
|
|
314
323
|
* used by the legacy Article class, enabling drop-in replacement without
|
|
315
324
|
* breaking existing components.
|
|
316
325
|
*
|
|
326
|
+
* NOTE: Reconstructs deprecated fields (buttons, cards, documents, forms, alignment)
|
|
327
|
+
* from the new consolidated structure for backwards compatibility.
|
|
328
|
+
*
|
|
317
329
|
* @param {Object} parsed - Parsed content from parseContent() (flat structure)
|
|
318
330
|
* @returns {Object} Legacy format { main, items } with nested header/body structure
|
|
319
331
|
*
|
|
@@ -334,6 +346,20 @@ function legacy(parsed) {
|
|
|
334
346
|
|
|
335
347
|
if (!banner) banner = imgs[0];
|
|
336
348
|
|
|
349
|
+
// Reconstruct deprecated fields from new structure
|
|
350
|
+
const links = content.links || [];
|
|
351
|
+
const buttons = links
|
|
352
|
+
.filter(l => l.role?.startsWith('button'))
|
|
353
|
+
.map(l => ({ attrs: l, content: l.label }));
|
|
354
|
+
const documents = links
|
|
355
|
+
.filter(l => l.role === 'document')
|
|
356
|
+
.map(l => ({ title: l.label, href: l.href, coverImg: l.preview }));
|
|
357
|
+
const plainLinks = links.filter(l => !l.role?.startsWith('button') && l.role !== 'document');
|
|
358
|
+
|
|
359
|
+
const cards = content.data?.cards || [];
|
|
360
|
+
const form = content.data?.form || null;
|
|
361
|
+
const forms = form ? [form] : [];
|
|
362
|
+
|
|
337
363
|
return {
|
|
338
364
|
header: {
|
|
339
365
|
title: content.title || "",
|
|
@@ -345,7 +371,7 @@ function legacy(parsed) {
|
|
|
345
371
|
content.subtitle2 ||
|
|
346
372
|
first(content.paragraphs) ||
|
|
347
373
|
"",
|
|
348
|
-
alignment:
|
|
374
|
+
alignment: "", // Deprecated: always empty
|
|
349
375
|
},
|
|
350
376
|
banner,
|
|
351
377
|
body: {
|
|
@@ -354,16 +380,16 @@ function legacy(parsed) {
|
|
|
354
380
|
imgs,
|
|
355
381
|
videos: content.videos || [],
|
|
356
382
|
lists: content.lists || [],
|
|
357
|
-
links:
|
|
383
|
+
links: plainLinks,
|
|
358
384
|
icons: content.icons || [],
|
|
359
|
-
buttons
|
|
360
|
-
cards
|
|
361
|
-
documents
|
|
362
|
-
forms
|
|
363
|
-
form
|
|
385
|
+
buttons,
|
|
386
|
+
cards,
|
|
387
|
+
documents,
|
|
388
|
+
forms,
|
|
389
|
+
form,
|
|
364
390
|
quotes: content.quotes || [],
|
|
365
|
-
properties: content.
|
|
366
|
-
propertyBlocks:
|
|
391
|
+
properties: content.data || {},
|
|
392
|
+
propertyBlocks: [],
|
|
367
393
|
},
|
|
368
394
|
};
|
|
369
395
|
};
|
package/src/processors/groups.js
CHANGED
|
@@ -10,19 +10,13 @@ function flattenGroup(group) {
|
|
|
10
10
|
pretitle: group.header.pretitle || '',
|
|
11
11
|
subtitle: group.header.subtitle || '',
|
|
12
12
|
subtitle2: group.header.subtitle2 || '',
|
|
13
|
-
alignment: group.header.alignment || null,
|
|
14
13
|
paragraphs: group.body.paragraphs || [],
|
|
15
14
|
links: group.body.links || [],
|
|
16
15
|
imgs: group.body.imgs || [],
|
|
17
16
|
icons: group.body.icons || [],
|
|
18
17
|
lists: group.body.lists || [],
|
|
19
18
|
videos: group.body.videos || [],
|
|
20
|
-
|
|
21
|
-
properties: group.body.properties || {},
|
|
22
|
-
propertyBlocks: group.body.propertyBlocks || [],
|
|
23
|
-
cards: group.body.cards || [],
|
|
24
|
-
documents: group.body.documents || [],
|
|
25
|
-
forms: group.body.forms || [],
|
|
19
|
+
data: group.body.data || {},
|
|
26
20
|
quotes: group.body.quotes || [],
|
|
27
21
|
headings: group.body.headings || [],
|
|
28
22
|
};
|
|
@@ -42,19 +36,13 @@ function processGroups(sequence, options = {}) {
|
|
|
42
36
|
pretitle: '',
|
|
43
37
|
subtitle: '',
|
|
44
38
|
subtitle2: '',
|
|
45
|
-
alignment: null,
|
|
46
39
|
paragraphs: [],
|
|
47
40
|
links: [],
|
|
48
41
|
imgs: [],
|
|
49
42
|
icons: [],
|
|
50
43
|
lists: [],
|
|
51
44
|
videos: [],
|
|
52
|
-
|
|
53
|
-
properties: {},
|
|
54
|
-
propertyBlocks: [],
|
|
55
|
-
cards: [],
|
|
56
|
-
documents: [],
|
|
57
|
-
forms: [],
|
|
45
|
+
data: {},
|
|
58
46
|
quotes: [],
|
|
59
47
|
headings: [],
|
|
60
48
|
items: [],
|
|
@@ -84,19 +72,13 @@ function processGroups(sequence, options = {}) {
|
|
|
84
72
|
pretitle: '',
|
|
85
73
|
subtitle: '',
|
|
86
74
|
subtitle2: '',
|
|
87
|
-
alignment: null,
|
|
88
75
|
paragraphs: [],
|
|
89
76
|
links: [],
|
|
90
77
|
imgs: [],
|
|
91
78
|
icons: [],
|
|
92
79
|
lists: [],
|
|
93
80
|
videos: [],
|
|
94
|
-
|
|
95
|
-
properties: {},
|
|
96
|
-
propertyBlocks: [],
|
|
97
|
-
cards: [],
|
|
98
|
-
documents: [],
|
|
99
|
-
forms: [],
|
|
81
|
+
data: {},
|
|
100
82
|
quotes: [],
|
|
101
83
|
headings: [],
|
|
102
84
|
};
|
|
@@ -228,7 +210,6 @@ function processGroupContent(elements) {
|
|
|
228
210
|
title: "",
|
|
229
211
|
subtitle: "",
|
|
230
212
|
subtitle2: "",
|
|
231
|
-
alignment: null,
|
|
232
213
|
};
|
|
233
214
|
|
|
234
215
|
const body = {
|
|
@@ -238,12 +219,7 @@ function processGroupContent(elements) {
|
|
|
238
219
|
paragraphs: [],
|
|
239
220
|
links: [],
|
|
240
221
|
lists: [],
|
|
241
|
-
|
|
242
|
-
properties: {},
|
|
243
|
-
propertyBlocks: [],
|
|
244
|
-
cards: [],
|
|
245
|
-
documents: [],
|
|
246
|
-
forms: [],
|
|
222
|
+
data: {},
|
|
247
223
|
quotes: [],
|
|
248
224
|
headings: [],
|
|
249
225
|
};
|
|
@@ -276,10 +252,6 @@ function processGroupContent(elements) {
|
|
|
276
252
|
//We shuold set the group level to the highest one instead of the first one.
|
|
277
253
|
metadata.level ??= element.level;
|
|
278
254
|
|
|
279
|
-
// Extract alignment from first heading
|
|
280
|
-
if (!header.alignment && element.attrs?.textAlign) {
|
|
281
|
-
header.alignment = element.attrs.textAlign;
|
|
282
|
-
}
|
|
283
255
|
// h3 h2 h1 h1
|
|
284
256
|
// Assign to header fields
|
|
285
257
|
// h3 h2 h3 h4
|
|
@@ -333,9 +305,16 @@ function processGroupContent(elements) {
|
|
|
333
305
|
break;
|
|
334
306
|
|
|
335
307
|
case "button":
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
308
|
+
// Map button to link with role
|
|
309
|
+
body.links.push({
|
|
310
|
+
href: element.attrs?.href || '',
|
|
311
|
+
label: element.text || '',
|
|
312
|
+
role: element.attrs?.variant ? `button-${element.attrs.variant}` : 'button',
|
|
313
|
+
variant: element.attrs?.variant || 'primary',
|
|
314
|
+
size: element.attrs?.size,
|
|
315
|
+
icon: element.attrs?.icon,
|
|
316
|
+
target: element.attrs?.target,
|
|
317
|
+
class: element.attrs?.class,
|
|
339
318
|
});
|
|
340
319
|
break;
|
|
341
320
|
|
|
@@ -345,22 +324,49 @@ function processGroupContent(elements) {
|
|
|
345
324
|
body.quotes.push(quoteContent.body);
|
|
346
325
|
break;
|
|
347
326
|
|
|
327
|
+
case "dataBlock":
|
|
328
|
+
// Pre-parsed structured data from content-reader
|
|
329
|
+
body.data[element.tag] = element.data;
|
|
330
|
+
break;
|
|
331
|
+
|
|
348
332
|
case "codeBlock":
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
333
|
+
// Fallback: tagged code blocks where parsing failed at build time
|
|
334
|
+
// Untagged blocks stay in sequence for display
|
|
335
|
+
const tag = element.attrs?.tag;
|
|
336
|
+
if (tag) {
|
|
337
|
+
body.data[tag] = element.text;
|
|
338
|
+
}
|
|
352
339
|
break;
|
|
353
340
|
|
|
354
341
|
case "form":
|
|
355
|
-
|
|
342
|
+
// Map FormBlock to data.form
|
|
343
|
+
body.data.form = element.data || element.attrs;
|
|
356
344
|
break;
|
|
357
345
|
|
|
358
346
|
case "card-group":
|
|
359
|
-
|
|
347
|
+
// Map cards to data by type: data.person = [...], data.event = [...]
|
|
348
|
+
// Each card type becomes a key, with an array of cards of that type
|
|
349
|
+
(element.cards || []).forEach(card => {
|
|
350
|
+
const cardType = card.cardType || 'card';
|
|
351
|
+
if (!body.data[cardType]) body.data[cardType] = [];
|
|
352
|
+
// Remove cardType from the card object since it's now the key
|
|
353
|
+
const { cardType: _, ...cardData } = card;
|
|
354
|
+
body.data[cardType].push(cardData);
|
|
355
|
+
});
|
|
360
356
|
break;
|
|
361
357
|
|
|
362
358
|
case "document-group":
|
|
363
|
-
|
|
359
|
+
// Map documents to links with role=document
|
|
360
|
+
element.documents.forEach(doc => {
|
|
361
|
+
body.links.push({
|
|
362
|
+
href: doc.href || doc.downloadUrl || '',
|
|
363
|
+
label: doc.title || '',
|
|
364
|
+
role: 'document',
|
|
365
|
+
download: true,
|
|
366
|
+
preview: doc.coverImg,
|
|
367
|
+
fileType: doc.fileType,
|
|
368
|
+
});
|
|
369
|
+
});
|
|
364
370
|
break;
|
|
365
371
|
}
|
|
366
372
|
}
|
|
@@ -1,3 +1,52 @@
|
|
|
1
|
+
import { parse as parseYaml } from "yaml";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Get code block data - prefers pre-parsed attrs.data, falls back to parsing text
|
|
5
|
+
*
|
|
6
|
+
* Content can come from two sources:
|
|
7
|
+
* 1. Pre-parsed at build time: attrs.data contains parsed JS object
|
|
8
|
+
* 2. Legacy/runtime: text needs to be parsed based on language
|
|
9
|
+
*
|
|
10
|
+
* @param {string} text - Raw code block text
|
|
11
|
+
* @param {Object} attrs - Code block attributes (language, tag, data)
|
|
12
|
+
* @returns {*} Parsed data or raw text
|
|
13
|
+
*/
|
|
14
|
+
function getCodeBlockData(text, attrs) {
|
|
15
|
+
const { language, tag, data } = attrs || {};
|
|
16
|
+
|
|
17
|
+
// Only process tagged blocks
|
|
18
|
+
if (!tag) {
|
|
19
|
+
return text;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Prefer pre-parsed data from build time (attrs.data)
|
|
23
|
+
if (data !== undefined) {
|
|
24
|
+
return data;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// Fallback: parse text at runtime (for backwards compatibility)
|
|
28
|
+
const lang = (language || "").toLowerCase();
|
|
29
|
+
|
|
30
|
+
if (lang === "json") {
|
|
31
|
+
try {
|
|
32
|
+
return JSON.parse(text);
|
|
33
|
+
} catch {
|
|
34
|
+
return text;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (lang === "yaml" || lang === "yml") {
|
|
39
|
+
try {
|
|
40
|
+
return parseYaml(text);
|
|
41
|
+
} catch {
|
|
42
|
+
return text;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Unknown language - return raw text
|
|
47
|
+
return text;
|
|
48
|
+
}
|
|
49
|
+
|
|
1
50
|
/**
|
|
2
51
|
* Process a ProseMirror/TipTap document into a flat sequence
|
|
3
52
|
* @param {Object} doc ProseMirror document
|
|
@@ -79,20 +128,19 @@ function createSequenceElement(node, options = {}) {
|
|
|
79
128
|
attrs,
|
|
80
129
|
};
|
|
81
130
|
|
|
82
|
-
case "
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
} catch (err) {
|
|
90
|
-
parsed = textContent;
|
|
91
|
-
}
|
|
131
|
+
case "dataBlock":
|
|
132
|
+
// Pre-parsed structured data from content-reader
|
|
133
|
+
return {
|
|
134
|
+
type: "dataBlock",
|
|
135
|
+
data: attrs.data,
|
|
136
|
+
tag: attrs.tag,
|
|
137
|
+
};
|
|
92
138
|
|
|
139
|
+
case "codeBlock":
|
|
140
|
+
const codeText = getTextContent(content, options);
|
|
93
141
|
return {
|
|
94
142
|
type: "codeBlock",
|
|
95
|
-
text:
|
|
143
|
+
text: getCodeBlockData(codeText, attrs),
|
|
96
144
|
attrs,
|
|
97
145
|
};
|
|
98
146
|
|