@uniweb/semantic-parser 1.0.8 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -104
- package/docs/api.md +38 -40
- package/docs/mapping-patterns.md +47 -47
- package/docs/text-component-reference.md +3 -3
- package/package.json +1 -1
- package/src/index.js +5 -7
- package/src/mappers/extractors.js +113 -120
- package/src/processors/groups.js +96 -25
- package/src/processors/byType.js +0 -130
package/README.md
CHANGED
|
@@ -4,11 +4,10 @@ A semantic parser for ProseMirror/TipTap content structures that helps bridge th
|
|
|
4
4
|
|
|
5
5
|
## What it Does
|
|
6
6
|
|
|
7
|
-
The parser transforms rich text editor content (ProseMirror/TipTap) into structured, semantic groups that web components can easily consume. It provides
|
|
7
|
+
The parser transforms rich text editor content (ProseMirror/TipTap) into structured, semantic groups that web components can easily consume. It provides two complementary views of your content:
|
|
8
8
|
|
|
9
|
-
1. **Sequence**:
|
|
10
|
-
2. **Groups**: Content organized into semantic sections
|
|
11
|
-
3. **ByType**: Elements categorized by type for easy filtering and queries
|
|
9
|
+
1. **Sequence**: An ordered list of all content elements (for rendering in document order)
|
|
10
|
+
2. **Groups**: Content organized into semantic sections (main content + items)
|
|
12
11
|
|
|
13
12
|
## Installation
|
|
14
13
|
|
|
@@ -41,16 +40,16 @@ const doc = {
|
|
|
41
40
|
const result = parseContent(doc);
|
|
42
41
|
|
|
43
42
|
// Access different views
|
|
44
|
-
console.log(result.sequence); //
|
|
45
|
-
console.log(result.
|
|
46
|
-
console.log(result.
|
|
43
|
+
console.log(result.sequence); // Ordered array of elements
|
|
44
|
+
console.log(result.title); // Main content fields at top level
|
|
45
|
+
console.log(result.items); // Additional content groups
|
|
47
46
|
```
|
|
48
47
|
|
|
49
48
|
## Output Structure
|
|
50
49
|
|
|
51
50
|
### Sequence View
|
|
52
51
|
|
|
53
|
-
|
|
52
|
+
An ordered array of semantic elements preserving document order:
|
|
54
53
|
|
|
55
54
|
```js
|
|
56
55
|
result.sequence = [
|
|
@@ -59,72 +58,37 @@ result.sequence = [
|
|
|
59
58
|
]
|
|
60
59
|
```
|
|
61
60
|
|
|
62
|
-
###
|
|
61
|
+
### Content Structure
|
|
63
62
|
|
|
64
|
-
|
|
63
|
+
Main content fields are at the top level. The `items` array contains additional content groups (e.g., H3 sections), each with the same field structure:
|
|
65
64
|
|
|
66
65
|
```js
|
|
67
|
-
result
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
dividerMode: false, // Using dividers vs headings
|
|
88
|
-
groups: 0
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
```
|
|
92
|
-
|
|
93
|
-
### ByType View
|
|
66
|
+
result = {
|
|
67
|
+
// Main content fields
|
|
68
|
+
pretitle: "", // H3 before main title
|
|
69
|
+
title: "Welcome", // Main heading (H1)
|
|
70
|
+
subtitle: "", // H2 after main title
|
|
71
|
+
paragraphs: ["Get started today."],
|
|
72
|
+
imgs: [],
|
|
73
|
+
videos: [],
|
|
74
|
+
links: [],
|
|
75
|
+
lists: [],
|
|
76
|
+
icons: [],
|
|
77
|
+
buttons: [],
|
|
78
|
+
banner: null, // Optional banner image
|
|
79
|
+
// ... more content types
|
|
80
|
+
|
|
81
|
+
// Additional content groups (H3 sections)
|
|
82
|
+
items: [
|
|
83
|
+
{ title: "Feature 1", paragraphs: [...], links: [...] },
|
|
84
|
+
{ title: "Feature 2", paragraphs: [...], links: [...] }
|
|
85
|
+
],
|
|
94
86
|
|
|
95
|
-
|
|
87
|
+
// Ordered sequence for document-order rendering
|
|
88
|
+
sequence: [...],
|
|
96
89
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
headings: [
|
|
100
|
-
{
|
|
101
|
-
type: "heading",
|
|
102
|
-
level: 1,
|
|
103
|
-
content: "Welcome",
|
|
104
|
-
context: {
|
|
105
|
-
position: 0,
|
|
106
|
-
previousElement: null,
|
|
107
|
-
nextElement: { type: "paragraph", ... },
|
|
108
|
-
nearestHeading: null
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
],
|
|
112
|
-
paragraphs: [ /* ... */ ],
|
|
113
|
-
images: {
|
|
114
|
-
background: [],
|
|
115
|
-
content: [],
|
|
116
|
-
gallery: [],
|
|
117
|
-
icon: []
|
|
118
|
-
},
|
|
119
|
-
lists: [],
|
|
120
|
-
metadata: {
|
|
121
|
-
totalElements: 2,
|
|
122
|
-
dominantType: "paragraph",
|
|
123
|
-
hasMedia: false
|
|
124
|
-
},
|
|
125
|
-
// Helper methods
|
|
126
|
-
getHeadingsByLevel(level),
|
|
127
|
-
getElementsByHeadingContext(filter)
|
|
90
|
+
// Original document
|
|
91
|
+
raw: { type: "doc", content: [...] }
|
|
128
92
|
}
|
|
129
93
|
```
|
|
130
94
|
|
|
@@ -133,45 +97,29 @@ result.byType = {
|
|
|
133
97
|
### Extracting Main Content
|
|
134
98
|
|
|
135
99
|
```js
|
|
136
|
-
const
|
|
100
|
+
const content = parseContent(doc);
|
|
137
101
|
|
|
138
|
-
const title =
|
|
139
|
-
const description =
|
|
140
|
-
const image =
|
|
102
|
+
const title = content.title;
|
|
103
|
+
const description = content.paragraphs.join(" ");
|
|
104
|
+
const image = content.banner?.url;
|
|
141
105
|
```
|
|
142
106
|
|
|
143
107
|
### Processing Content Sections
|
|
144
108
|
|
|
145
109
|
```js
|
|
146
|
-
const
|
|
110
|
+
const content = parseContent(doc);
|
|
147
111
|
|
|
148
112
|
// Main content
|
|
149
|
-
console.log("
|
|
113
|
+
console.log("Title:", content.title);
|
|
114
|
+
console.log("Description:", content.paragraphs);
|
|
150
115
|
|
|
151
|
-
// Additional sections
|
|
152
|
-
|
|
153
|
-
console.log("Section:", item.
|
|
154
|
-
console.log("Content:", item.
|
|
116
|
+
// Additional sections (H3 groups)
|
|
117
|
+
content.items.forEach(item => {
|
|
118
|
+
console.log("Section:", item.title);
|
|
119
|
+
console.log("Content:", item.paragraphs);
|
|
155
120
|
});
|
|
156
121
|
```
|
|
157
122
|
|
|
158
|
-
### Finding Specific Elements
|
|
159
|
-
|
|
160
|
-
```js
|
|
161
|
-
const { byType } = parseContent(doc);
|
|
162
|
-
|
|
163
|
-
// Get all H2 headings
|
|
164
|
-
const subheadings = byType.getHeadingsByLevel(2);
|
|
165
|
-
|
|
166
|
-
// Get all background images
|
|
167
|
-
const backgrounds = byType.images.background;
|
|
168
|
-
|
|
169
|
-
// Get content under specific headings
|
|
170
|
-
const features = byType.getElementsByHeadingContext(
|
|
171
|
-
h => h.content.includes("Features")
|
|
172
|
-
);
|
|
173
|
-
```
|
|
174
|
-
|
|
175
123
|
### Sequential Processing
|
|
176
124
|
|
|
177
125
|
```js
|
|
@@ -203,17 +151,17 @@ Automatically transform content based on field types with context-aware behavior
|
|
|
203
151
|
```js
|
|
204
152
|
const schema = {
|
|
205
153
|
title: {
|
|
206
|
-
path: "
|
|
154
|
+
path: "title",
|
|
207
155
|
type: "plaintext", // Auto-strips <strong>, <em>, etc.
|
|
208
156
|
maxLength: 60 // Auto-truncates intelligently
|
|
209
157
|
},
|
|
210
158
|
excerpt: {
|
|
211
|
-
path: "
|
|
159
|
+
path: "paragraphs",
|
|
212
160
|
type: "excerpt", // Auto-creates excerpt from paragraphs
|
|
213
161
|
maxLength: 150
|
|
214
162
|
},
|
|
215
163
|
image: {
|
|
216
|
-
path: "
|
|
164
|
+
path: "imgs[0].url",
|
|
217
165
|
type: "image",
|
|
218
166
|
defaultValue: "/placeholder.jpg"
|
|
219
167
|
}
|
|
@@ -259,15 +207,15 @@ Define custom mappings using schemas:
|
|
|
259
207
|
|
|
260
208
|
```js
|
|
261
209
|
const schema = {
|
|
262
|
-
brand: "
|
|
263
|
-
title: "
|
|
264
|
-
subtitle: "
|
|
210
|
+
brand: "pretitle",
|
|
211
|
+
title: "title",
|
|
212
|
+
subtitle: "subtitle",
|
|
265
213
|
image: {
|
|
266
|
-
path: "
|
|
214
|
+
path: "imgs[0].url",
|
|
267
215
|
defaultValue: "/placeholder.jpg"
|
|
268
216
|
},
|
|
269
217
|
actions: {
|
|
270
|
-
path: "
|
|
218
|
+
path: "links",
|
|
271
219
|
transform: links => links.map(l => ({ label: l.label, type: "primary" }))
|
|
272
220
|
}
|
|
273
221
|
};
|
package/docs/api.md
CHANGED
|
@@ -118,51 +118,49 @@ A flat array of semantic elements extracted from the document tree.
|
|
|
118
118
|
|
|
119
119
|
### `groups`
|
|
120
120
|
|
|
121
|
-
Content organized into semantic groups with identified main content and items.
|
|
121
|
+
Content organized into semantic groups with identified main content and items. The structure is flat - header and body fields are merged at the top level.
|
|
122
122
|
|
|
123
123
|
```js
|
|
124
124
|
{
|
|
125
125
|
main: {
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
126
|
+
// Header fields (flat)
|
|
127
|
+
pretitle: "PRETITLE TEXT", // H3 before main title
|
|
128
|
+
title: "Main Title", // First heading in group
|
|
129
|
+
subtitle: "Subtitle", // Second heading in group
|
|
130
|
+
|
|
131
|
+
// Body fields (flat)
|
|
132
|
+
paragraphs: ["paragraph text", ...],
|
|
133
|
+
imgs: [
|
|
134
|
+
{ url: "...", caption: "...", alt: "..." }
|
|
135
|
+
],
|
|
136
|
+
icons: ["<svg>...</svg>", ...],
|
|
137
|
+
videos: [
|
|
138
|
+
{ src: "...", caption: "...", alt: "..." }
|
|
139
|
+
],
|
|
140
|
+
links: [
|
|
141
|
+
{ href: "...", label: "..." }
|
|
142
|
+
],
|
|
143
|
+
lists: [
|
|
144
|
+
[/* processed list items */]
|
|
145
|
+
],
|
|
146
|
+
buttons: [
|
|
147
|
+
{ content: "...", attrs: {...} }
|
|
148
|
+
],
|
|
149
|
+
properties: [], // Code block content
|
|
150
|
+
propertyBlocks: [], // Array of code blocks
|
|
151
|
+
cards: [], // Not yet implemented
|
|
152
|
+
headings: [], // Used in list items
|
|
153
|
+
|
|
154
|
+
// Banner (flat)
|
|
154
155
|
banner: {
|
|
155
156
|
url: "path/to/banner.jpg",
|
|
156
157
|
caption: "Banner caption",
|
|
157
158
|
alt: "Banner alt text"
|
|
158
|
-
} | null
|
|
159
|
-
metadata: {
|
|
160
|
-
level: 1, // Heading level that started this group
|
|
161
|
-
contentTypes: {} // Set of content types in group
|
|
162
|
-
}
|
|
159
|
+
} | null
|
|
163
160
|
},
|
|
164
161
|
items: [
|
|
165
|
-
// Array of groups with same structure as main
|
|
162
|
+
// Array of groups with same flat structure as main
|
|
163
|
+
// { title, pretitle, subtitle, paragraphs, imgs, ... }
|
|
166
164
|
],
|
|
167
165
|
metadata: {
|
|
168
166
|
dividerMode: false, // Whether dividers were used for grouping
|
|
@@ -268,14 +266,14 @@ const result = parseContent(doc);
|
|
|
268
266
|
```js
|
|
269
267
|
const { groups } = parseContent(doc);
|
|
270
268
|
|
|
271
|
-
// Access main content
|
|
272
|
-
console.log(groups.main.
|
|
273
|
-
console.log(groups.main.
|
|
269
|
+
// Access main content (flat structure)
|
|
270
|
+
console.log(groups.main.title);
|
|
271
|
+
console.log(groups.main.paragraphs);
|
|
274
272
|
|
|
275
273
|
// Iterate through content items
|
|
276
274
|
groups.items.forEach(item => {
|
|
277
|
-
console.log(item.
|
|
278
|
-
console.log(item.
|
|
275
|
+
console.log(item.title);
|
|
276
|
+
console.log(item.paragraphs);
|
|
279
277
|
});
|
|
280
278
|
```
|
|
281
279
|
|
package/docs/mapping-patterns.md
CHANGED
|
@@ -27,17 +27,17 @@ Gracefully handles content issues with silent, automatic cleanup:
|
|
|
27
27
|
```js
|
|
28
28
|
const schema = {
|
|
29
29
|
title: {
|
|
30
|
-
path: "groups.main.
|
|
30
|
+
path: "groups.main.title",
|
|
31
31
|
type: "plaintext", // Auto-strips HTML markup
|
|
32
32
|
maxLength: 60 // Auto-truncates with smart boundaries
|
|
33
33
|
},
|
|
34
34
|
description: {
|
|
35
|
-
path: "groups.main.
|
|
35
|
+
path: "groups.main.paragraphs",
|
|
36
36
|
type: "excerpt", // Auto-creates excerpt from paragraphs
|
|
37
37
|
maxLength: 150
|
|
38
38
|
},
|
|
39
39
|
image: {
|
|
40
|
-
path: "groups.main.
|
|
40
|
+
path: "groups.main.imgs[0].url",
|
|
41
41
|
type: "image", // Normalizes image data
|
|
42
42
|
defaultValue: "/placeholder.jpg",
|
|
43
43
|
treatEmptyAsDefault: true
|
|
@@ -74,7 +74,7 @@ Strips all HTML markup, returning clean text. Perfect for titles, labels, and an
|
|
|
74
74
|
```js
|
|
75
75
|
{
|
|
76
76
|
title: {
|
|
77
|
-
path: "groups.main.
|
|
77
|
+
path: "groups.main.title",
|
|
78
78
|
type: "plaintext",
|
|
79
79
|
maxLength: 60, // Auto-truncate
|
|
80
80
|
boundary: "word", // or "sentence", "character"
|
|
@@ -94,7 +94,7 @@ Preserves safe HTML while removing dangerous tags (script, iframe, etc.).
|
|
|
94
94
|
```js
|
|
95
95
|
{
|
|
96
96
|
description: {
|
|
97
|
-
path: "groups.main.
|
|
97
|
+
path: "groups.main.paragraphs[0]",
|
|
98
98
|
type: "richtext",
|
|
99
99
|
allowedTags: ["strong", "em", "a", "br"], // Customize allowed tags
|
|
100
100
|
stripTags: ["script", "style"] // Additional tags to remove
|
|
@@ -112,7 +112,7 @@ Auto-generates excerpt from content, stripping markup and truncating intelligent
|
|
|
112
112
|
```js
|
|
113
113
|
{
|
|
114
114
|
excerpt: {
|
|
115
|
-
path: "groups.main.
|
|
115
|
+
path: "groups.main.paragraphs",
|
|
116
116
|
type: "excerpt",
|
|
117
117
|
maxLength: 150,
|
|
118
118
|
boundary: "word", // or "sentence"
|
|
@@ -131,7 +131,7 @@ Parses and optionally formats numbers.
|
|
|
131
131
|
```js
|
|
132
132
|
{
|
|
133
133
|
price: {
|
|
134
|
-
path: "groups.main.
|
|
134
|
+
path: "groups.main.title",
|
|
135
135
|
type: "number",
|
|
136
136
|
format: {
|
|
137
137
|
decimals: 2,
|
|
@@ -152,7 +152,7 @@ Normalizes image data structure.
|
|
|
152
152
|
```js
|
|
153
153
|
{
|
|
154
154
|
image: {
|
|
155
|
-
path: "groups.main.
|
|
155
|
+
path: "groups.main.imgs[0]",
|
|
156
156
|
type: "image",
|
|
157
157
|
defaultValue: "/placeholder.jpg",
|
|
158
158
|
defaultAlt: "Image"
|
|
@@ -170,7 +170,7 @@ Normalizes link data structure.
|
|
|
170
170
|
```js
|
|
171
171
|
{
|
|
172
172
|
cta: {
|
|
173
|
-
path: "groups.main.
|
|
173
|
+
path: "groups.main.links[0]",
|
|
174
174
|
type: "link"
|
|
175
175
|
}
|
|
176
176
|
}
|
|
@@ -212,34 +212,34 @@ const hints = mappers.validateSchema(parsed, schema, { mode: 'visual-editor' });
|
|
|
212
212
|
// Component declares its content requirements
|
|
213
213
|
const componentSchema = {
|
|
214
214
|
brand: {
|
|
215
|
-
path: "groups.main.
|
|
215
|
+
path: "groups.main.pretitle",
|
|
216
216
|
type: "plaintext",
|
|
217
217
|
maxLength: 20,
|
|
218
218
|
transform: (text) => text.toUpperCase()
|
|
219
219
|
},
|
|
220
220
|
title: {
|
|
221
|
-
path: "groups.main.
|
|
221
|
+
path: "groups.main.title",
|
|
222
222
|
type: "plaintext",
|
|
223
223
|
maxLength: 60,
|
|
224
224
|
required: true
|
|
225
225
|
},
|
|
226
226
|
subtitle: {
|
|
227
|
-
path: "groups.main.
|
|
227
|
+
path: "groups.main.subtitle",
|
|
228
228
|
type: "plaintext",
|
|
229
229
|
maxLength: 100
|
|
230
230
|
},
|
|
231
231
|
description: {
|
|
232
|
-
path: "groups.main.
|
|
232
|
+
path: "groups.main.paragraphs",
|
|
233
233
|
type: "excerpt",
|
|
234
234
|
maxLength: 200
|
|
235
235
|
},
|
|
236
236
|
image: {
|
|
237
|
-
path: "groups.main.
|
|
237
|
+
path: "groups.main.imgs[0].url",
|
|
238
238
|
type: "image",
|
|
239
239
|
defaultValue: "/placeholder.jpg"
|
|
240
240
|
},
|
|
241
241
|
cta: {
|
|
242
|
-
path: "groups.main.
|
|
242
|
+
path: "groups.main.links[0]",
|
|
243
243
|
type: "link"
|
|
244
244
|
}
|
|
245
245
|
};
|
|
@@ -272,8 +272,8 @@ const heroData = mappers.extractors.hero(parsed);
|
|
|
272
272
|
|
|
273
273
|
// Or use schema-based extraction
|
|
274
274
|
const customData = mappers.extractBySchema(parsed, {
|
|
275
|
-
title: "groups.main.
|
|
276
|
-
image: { path: "groups.main.
|
|
275
|
+
title: "groups.main.title",
|
|
276
|
+
image: { path: "groups.main.imgs[0].url", defaultValue: "/placeholder.jpg" }
|
|
277
277
|
});
|
|
278
278
|
```
|
|
279
279
|
|
|
@@ -291,7 +291,7 @@ const image = helpers.first(images, "/default.jpg");
|
|
|
291
291
|
const lastParagraph = helpers.last(paragraphs);
|
|
292
292
|
|
|
293
293
|
// Transform array
|
|
294
|
-
const titles = helpers.transformArray(items, item => item.
|
|
294
|
+
const titles = helpers.transformArray(items, item => item.title);
|
|
295
295
|
|
|
296
296
|
// Filter and transform
|
|
297
297
|
const h2s = helpers.filterArray(headings, h => h.level === 2, h => h.content);
|
|
@@ -308,7 +308,7 @@ const cleanArray = helpers.compact([null, "text", "", undefined, "more"]);
|
|
|
308
308
|
|
|
309
309
|
```js
|
|
310
310
|
// Get nested value safely
|
|
311
|
-
const title = helpers.get(parsed, "groups.main.
|
|
311
|
+
const title = helpers.get(parsed, "groups.main.title", "Untitled");
|
|
312
312
|
|
|
313
313
|
// Pick specific properties
|
|
314
314
|
const metadata = helpers.pick(parsed.groups.main, ["header", "banner"]);
|
|
@@ -337,7 +337,7 @@ if (!validation.valid) {
|
|
|
337
337
|
```js
|
|
338
338
|
// Wrap extraction in try-catch
|
|
339
339
|
const safeExtractor = helpers.safe((parsed) => {
|
|
340
|
-
return parsed.groups.main.
|
|
340
|
+
return parsed.groups.main.title.toUpperCase();
|
|
341
341
|
}, "DEFAULT");
|
|
342
342
|
|
|
343
343
|
const title = safeExtractor(parsed); // Won't throw if path is invalid
|
|
@@ -350,24 +350,24 @@ const title = safeExtractor(parsed); // Won't throw if path is invalid
|
|
|
350
350
|
```js
|
|
351
351
|
const { accessor } = mappers;
|
|
352
352
|
|
|
353
|
-
// Simple path
|
|
354
|
-
const title = accessor.getByPath(parsed, "groups.main.
|
|
353
|
+
// Simple path (flat structure)
|
|
354
|
+
const title = accessor.getByPath(parsed, "groups.main.title");
|
|
355
355
|
|
|
356
356
|
// Array index notation
|
|
357
|
-
const firstImage = accessor.getByPath(parsed, "groups.main.
|
|
357
|
+
const firstImage = accessor.getByPath(parsed, "groups.main.imgs[0].url");
|
|
358
358
|
|
|
359
359
|
// With default value
|
|
360
|
-
const image = accessor.getByPath(parsed, "groups.main.
|
|
360
|
+
const image = accessor.getByPath(parsed, "groups.main.imgs[0].url", {
|
|
361
361
|
defaultValue: "/placeholder.jpg"
|
|
362
362
|
});
|
|
363
363
|
|
|
364
364
|
// With transformation
|
|
365
|
-
const description = accessor.getByPath(parsed, "groups.main.
|
|
365
|
+
const description = accessor.getByPath(parsed, "groups.main.paragraphs", {
|
|
366
366
|
transform: (paragraphs) => paragraphs.join(" ")
|
|
367
367
|
});
|
|
368
368
|
|
|
369
369
|
// Required field (throws if missing)
|
|
370
|
-
const title = accessor.getByPath(parsed, "groups.main.
|
|
370
|
+
const title = accessor.getByPath(parsed, "groups.main.title", {
|
|
371
371
|
required: true
|
|
372
372
|
});
|
|
373
373
|
```
|
|
@@ -378,22 +378,22 @@ Extract multiple fields at once using a schema:
|
|
|
378
378
|
|
|
379
379
|
```js
|
|
380
380
|
const schema = {
|
|
381
|
-
// Shorthand: just the path
|
|
382
|
-
title: "groups.main.
|
|
381
|
+
// Shorthand: just the path (flat structure)
|
|
382
|
+
title: "groups.main.title",
|
|
383
383
|
|
|
384
384
|
// Full config with options
|
|
385
385
|
image: {
|
|
386
|
-
path: "groups.main.
|
|
386
|
+
path: "groups.main.imgs[0].url",
|
|
387
387
|
defaultValue: "/placeholder.jpg"
|
|
388
388
|
},
|
|
389
389
|
|
|
390
390
|
description: {
|
|
391
|
-
path: "groups.main.
|
|
391
|
+
path: "groups.main.paragraphs",
|
|
392
392
|
transform: (p) => p.join(" ")
|
|
393
393
|
},
|
|
394
394
|
|
|
395
395
|
cta: {
|
|
396
|
-
path: "groups.main.
|
|
396
|
+
path: "groups.main.links[0]",
|
|
397
397
|
required: false
|
|
398
398
|
}
|
|
399
399
|
};
|
|
@@ -412,15 +412,15 @@ const data = accessor.extractBySchema(parsed, schema);
|
|
|
412
412
|
Extract data from array of items:
|
|
413
413
|
|
|
414
414
|
```js
|
|
415
|
-
// Simple: extract single field from each item
|
|
416
|
-
const titles = accessor.mapArray(parsed, "groups.items", "
|
|
415
|
+
// Simple: extract single field from each item (flat structure)
|
|
416
|
+
const titles = accessor.mapArray(parsed, "groups.items", "title");
|
|
417
417
|
// ["Item 1", "Item 2", "Item 3"]
|
|
418
418
|
|
|
419
419
|
// Complex: extract multiple fields from each item
|
|
420
420
|
const cards = accessor.mapArray(parsed, "groups.items", {
|
|
421
|
-
title: "
|
|
422
|
-
text: { path: "
|
|
423
|
-
image: { path: "
|
|
421
|
+
title: "title",
|
|
422
|
+
text: { path: "paragraphs", transform: p => p.join(" ") },
|
|
423
|
+
image: { path: "imgs[0].url", defaultValue: "/default.jpg" }
|
|
424
424
|
});
|
|
425
425
|
// [
|
|
426
426
|
// { title: "...", text: "...", image: "..." },
|
|
@@ -436,11 +436,11 @@ if (accessor.hasPath(parsed, "groups.main.banner.url")) {
|
|
|
436
436
|
// Banner exists
|
|
437
437
|
}
|
|
438
438
|
|
|
439
|
-
// Get first existing path
|
|
439
|
+
// Get first existing path (flat structure)
|
|
440
440
|
const image = accessor.getFirstExisting(parsed, [
|
|
441
441
|
"groups.main.banner.url",
|
|
442
|
-
"groups.main.
|
|
443
|
-
"groups.items[0].
|
|
442
|
+
"groups.main.imgs[0].url",
|
|
443
|
+
"groups.items[0].imgs[0].url"
|
|
444
444
|
], "/fallback.jpg");
|
|
445
445
|
```
|
|
446
446
|
|
|
@@ -640,8 +640,8 @@ const enhancedData = {
|
|
|
640
640
|
relatedPosts: helpers.transformArray(
|
|
641
641
|
accessor.getByPath(parsed, "groups.items", { defaultValue: [] }),
|
|
642
642
|
item => ({
|
|
643
|
-
title: item.
|
|
644
|
-
link: helpers.first(item.
|
|
643
|
+
title: item.title,
|
|
644
|
+
link: helpers.first(item.links)
|
|
645
645
|
})
|
|
646
646
|
),
|
|
647
647
|
|
|
@@ -662,13 +662,13 @@ const componentSchema = {
|
|
|
662
662
|
content: {
|
|
663
663
|
type: "hero", // Use pre-built extractor
|
|
664
664
|
// OR
|
|
665
|
-
mapping: { // Use custom mapping
|
|
666
|
-
brand: "groups.main.
|
|
667
|
-
title: "groups.main.
|
|
668
|
-
subtitle: "groups.main.
|
|
669
|
-
image: { path: "groups.main.
|
|
665
|
+
mapping: { // Use custom mapping (flat paths)
|
|
666
|
+
brand: "groups.main.pretitle",
|
|
667
|
+
title: "groups.main.title",
|
|
668
|
+
subtitle: "groups.main.subtitle",
|
|
669
|
+
image: { path: "groups.main.imgs[0].url", defaultValue: "/default.jpg" },
|
|
670
670
|
actions: {
|
|
671
|
-
path: "groups.main.
|
|
671
|
+
path: "groups.main.links",
|
|
672
672
|
transform: links => links.map(l => ({ label: l.label, type: "primary" }))
|
|
673
673
|
}
|
|
674
674
|
}
|
|
@@ -274,9 +274,9 @@ function Card({ data }) {
|
|
|
274
274
|
import { getByPath, extractBySchema } from '@uniweb/semantic-parser/mappers/accessor';
|
|
275
275
|
|
|
276
276
|
const schema = {
|
|
277
|
-
title: { path: 'groups.main.
|
|
278
|
-
subtitle: { path: 'groups.main.
|
|
279
|
-
content: { path: 'groups.main.
|
|
277
|
+
title: { path: 'groups.main.title' },
|
|
278
|
+
subtitle: { path: 'groups.main.subtitle' },
|
|
279
|
+
content: { path: 'groups.main.paragraphs' }
|
|
280
280
|
};
|
|
281
281
|
|
|
282
282
|
const data = extractBySchema(parsed, schema);
|
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { processSequence } from "./processors/sequence.js";
|
|
2
2
|
import { processGroups } from "./processors/groups.js";
|
|
3
|
-
import { processByType } from "./processors/byType.js";
|
|
4
3
|
import * as mappers from "./mappers/index.js";
|
|
5
4
|
|
|
6
5
|
/**
|
|
@@ -8,7 +7,7 @@ import * as mappers from "./mappers/index.js";
|
|
|
8
7
|
* @param {Object} doc - ProseMirror document
|
|
9
8
|
* @param {Object} options - Parsing options
|
|
10
9
|
* @param {boolean} options.parseCodeAsJson - Parse code blocks as JSON. Default: false
|
|
11
|
-
* @returns {Object}
|
|
10
|
+
* @returns {Object} Flat content structure with sequence for ordered access
|
|
12
11
|
*/
|
|
13
12
|
function parseContent(doc, options = {}) {
|
|
14
13
|
// Default options
|
|
@@ -17,18 +16,17 @@ function parseContent(doc, options = {}) {
|
|
|
17
16
|
...options,
|
|
18
17
|
};
|
|
19
18
|
|
|
20
|
-
// Process
|
|
19
|
+
// Process sequence (ordered elements)
|
|
21
20
|
const sequence = processSequence(doc, opts);
|
|
22
21
|
|
|
22
|
+
// Process groups (semantic structure) - returns flat object
|
|
23
23
|
const groups = processGroups(sequence, opts);
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
// Return flat structure with sequence at top level
|
|
27
26
|
return {
|
|
28
27
|
raw: doc,
|
|
29
28
|
sequence,
|
|
30
|
-
groups,
|
|
31
|
-
byType,
|
|
29
|
+
...groups, // Spread flat content: title, paragraphs, items, etc.
|
|
32
30
|
};
|
|
33
31
|
}
|
|
34
32
|
|
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Pre-built extractors for common component patterns
|
|
3
|
+
*
|
|
4
|
+
* All extractors work with the flat content structure:
|
|
5
|
+
* - Root level: title, pretitle, subtitle, paragraphs, links, imgs, items, etc.
|
|
6
|
+
* - Items array: each item has flat structure (title, paragraphs, etc.)
|
|
3
7
|
*/
|
|
4
8
|
|
|
5
9
|
import { first, joinParagraphs } from "./helpers.js";
|
|
@@ -12,18 +16,16 @@ import { first, joinParagraphs } from "./helpers.js";
|
|
|
12
16
|
* @returns {Object} Hero component data
|
|
13
17
|
*/
|
|
14
18
|
function hero(parsed) {
|
|
15
|
-
const main = parsed.groups?.main;
|
|
16
|
-
|
|
17
19
|
return {
|
|
18
|
-
title:
|
|
19
|
-
subtitle:
|
|
20
|
-
kicker:
|
|
21
|
-
description:
|
|
22
|
-
image: first(
|
|
23
|
-
imageAlt: first(
|
|
24
|
-
banner:
|
|
25
|
-
cta: first(
|
|
26
|
-
button: first(
|
|
20
|
+
title: parsed?.title || null,
|
|
21
|
+
subtitle: parsed?.subtitle || null,
|
|
22
|
+
kicker: parsed?.pretitle || null,
|
|
23
|
+
description: parsed?.paragraphs || [],
|
|
24
|
+
image: first(parsed?.imgs)?.url || null,
|
|
25
|
+
imageAlt: first(parsed?.imgs)?.alt || null,
|
|
26
|
+
banner: null, // Banner detection would need to be added separately
|
|
27
|
+
cta: first(parsed?.links) || null,
|
|
28
|
+
button: first(parsed?.buttons) || null,
|
|
27
29
|
};
|
|
28
30
|
}
|
|
29
31
|
|
|
@@ -40,30 +42,30 @@ function hero(parsed) {
|
|
|
40
42
|
function card(parsed, options = {}) {
|
|
41
43
|
const { useItems = false, itemIndex } = options;
|
|
42
44
|
|
|
43
|
-
const extractCard = (
|
|
44
|
-
if (!
|
|
45
|
+
const extractCard = (content) => {
|
|
46
|
+
if (!content) return null;
|
|
45
47
|
|
|
46
48
|
return {
|
|
47
|
-
title:
|
|
48
|
-
subtitle:
|
|
49
|
-
description:
|
|
50
|
-
image: first(
|
|
51
|
-
imageAlt: first(
|
|
52
|
-
icon: first(
|
|
53
|
-
link: first(
|
|
54
|
-
button: first(
|
|
49
|
+
title: content.title || null,
|
|
50
|
+
subtitle: content.subtitle || null,
|
|
51
|
+
description: content.paragraphs || [],
|
|
52
|
+
image: first(content.imgs)?.url || null,
|
|
53
|
+
imageAlt: first(content.imgs)?.alt || null,
|
|
54
|
+
icon: first(content.icons) || null,
|
|
55
|
+
link: first(content.links) || null,
|
|
56
|
+
button: first(content.buttons) || null,
|
|
55
57
|
};
|
|
56
58
|
};
|
|
57
59
|
|
|
58
60
|
if (useItems) {
|
|
59
|
-
const items = parsed
|
|
61
|
+
const items = parsed?.items || [];
|
|
60
62
|
if (itemIndex !== undefined) {
|
|
61
63
|
return extractCard(items[itemIndex]);
|
|
62
64
|
}
|
|
63
65
|
return items.map(extractCard).filter(Boolean);
|
|
64
66
|
}
|
|
65
67
|
|
|
66
|
-
return extractCard(parsed
|
|
68
|
+
return extractCard(parsed);
|
|
67
69
|
}
|
|
68
70
|
|
|
69
71
|
/**
|
|
@@ -74,19 +76,17 @@ function card(parsed, options = {}) {
|
|
|
74
76
|
* @returns {Object} Article data
|
|
75
77
|
*/
|
|
76
78
|
function article(parsed) {
|
|
77
|
-
const main = parsed.groups?.main;
|
|
78
|
-
|
|
79
79
|
return {
|
|
80
|
-
title:
|
|
81
|
-
subtitle:
|
|
82
|
-
kicker:
|
|
83
|
-
author:
|
|
84
|
-
date:
|
|
85
|
-
banner:
|
|
86
|
-
content:
|
|
87
|
-
images:
|
|
88
|
-
videos:
|
|
89
|
-
links:
|
|
80
|
+
title: parsed?.title || null,
|
|
81
|
+
subtitle: parsed?.subtitle || null,
|
|
82
|
+
kicker: parsed?.pretitle || null,
|
|
83
|
+
author: null, // Would need metadata support
|
|
84
|
+
date: null, // Would need metadata support
|
|
85
|
+
banner: null, // Banner detection would need to be added separately
|
|
86
|
+
content: parsed?.paragraphs || [],
|
|
87
|
+
images: parsed?.imgs || [],
|
|
88
|
+
videos: parsed?.videos || [],
|
|
89
|
+
links: parsed?.links || [],
|
|
90
90
|
};
|
|
91
91
|
}
|
|
92
92
|
|
|
@@ -98,14 +98,13 @@ function article(parsed) {
|
|
|
98
98
|
* @returns {Array} Array of stat objects
|
|
99
99
|
*/
|
|
100
100
|
function stats(parsed) {
|
|
101
|
-
const items = parsed
|
|
101
|
+
const items = parsed?.items || [];
|
|
102
102
|
|
|
103
103
|
return items
|
|
104
104
|
.map((item) => ({
|
|
105
|
-
value: item.
|
|
106
|
-
label:
|
|
107
|
-
|
|
108
|
-
description: item.body?.paragraphs || [],
|
|
105
|
+
value: item.title || null,
|
|
106
|
+
label: item.subtitle || first(item.paragraphs) || null,
|
|
107
|
+
description: item.paragraphs || [],
|
|
109
108
|
}))
|
|
110
109
|
.filter((stat) => stat.value);
|
|
111
110
|
}
|
|
@@ -118,17 +117,17 @@ function stats(parsed) {
|
|
|
118
117
|
* @returns {Array} Navigation items
|
|
119
118
|
*/
|
|
120
119
|
function navigation(parsed) {
|
|
121
|
-
const items = parsed
|
|
120
|
+
const items = parsed?.items || [];
|
|
122
121
|
|
|
123
122
|
return items
|
|
124
123
|
.map((item) => {
|
|
125
124
|
const navItem = {
|
|
126
|
-
label: item.
|
|
127
|
-
href: first(item.
|
|
125
|
+
label: item.title || null,
|
|
126
|
+
href: first(item.links)?.href || null,
|
|
128
127
|
};
|
|
129
128
|
|
|
130
129
|
// Extract children from nested lists
|
|
131
|
-
const firstList = first(item.
|
|
130
|
+
const firstList = first(item.lists);
|
|
132
131
|
if (firstList && firstList.length > 0) {
|
|
133
132
|
navItem.children = firstList
|
|
134
133
|
.map((listItem) => ({
|
|
@@ -152,16 +151,16 @@ function navigation(parsed) {
|
|
|
152
151
|
* @returns {Array} Feature items
|
|
153
152
|
*/
|
|
154
153
|
function features(parsed) {
|
|
155
|
-
const items = parsed
|
|
154
|
+
const items = parsed?.items || [];
|
|
156
155
|
|
|
157
156
|
return items
|
|
158
157
|
.map((item) => ({
|
|
159
|
-
title: item.
|
|
160
|
-
subtitle: item.
|
|
161
|
-
description: item.
|
|
162
|
-
icon: first(item.
|
|
163
|
-
image: first(item.
|
|
164
|
-
link: first(item.
|
|
158
|
+
title: item.title || null,
|
|
159
|
+
subtitle: item.subtitle || null,
|
|
160
|
+
description: item.paragraphs || [],
|
|
161
|
+
icon: first(item.icons) || null,
|
|
162
|
+
image: first(item.imgs)?.url || null,
|
|
163
|
+
link: first(item.links) || null,
|
|
165
164
|
}))
|
|
166
165
|
.filter((feature) => feature.title);
|
|
167
166
|
}
|
|
@@ -178,25 +177,25 @@ function features(parsed) {
|
|
|
178
177
|
function testimonial(parsed, options = {}) {
|
|
179
178
|
const { useItems = false } = options;
|
|
180
179
|
|
|
181
|
-
const extractTestimonial = (
|
|
182
|
-
if (!
|
|
180
|
+
const extractTestimonial = (content) => {
|
|
181
|
+
if (!content) return null;
|
|
183
182
|
|
|
184
183
|
return {
|
|
185
|
-
quote:
|
|
186
|
-
author:
|
|
187
|
-
role:
|
|
188
|
-
company:
|
|
189
|
-
image: first(
|
|
190
|
-
imageAlt: first(
|
|
184
|
+
quote: content.paragraphs || [],
|
|
185
|
+
author: content.title || null,
|
|
186
|
+
role: content.subtitle || null,
|
|
187
|
+
company: content.pretitle || null,
|
|
188
|
+
image: first(content.imgs)?.url || null,
|
|
189
|
+
imageAlt: first(content.imgs)?.alt || null,
|
|
191
190
|
};
|
|
192
191
|
};
|
|
193
192
|
|
|
194
193
|
if (useItems) {
|
|
195
|
-
const items = parsed
|
|
194
|
+
const items = parsed?.items || [];
|
|
196
195
|
return items.map(extractTestimonial).filter(Boolean);
|
|
197
196
|
}
|
|
198
197
|
|
|
199
|
-
return extractTestimonial(parsed
|
|
198
|
+
return extractTestimonial(parsed);
|
|
200
199
|
}
|
|
201
200
|
|
|
202
201
|
/**
|
|
@@ -207,13 +206,13 @@ function testimonial(parsed, options = {}) {
|
|
|
207
206
|
* @returns {Array} FAQ items
|
|
208
207
|
*/
|
|
209
208
|
function faq(parsed) {
|
|
210
|
-
const items = parsed
|
|
209
|
+
const items = parsed?.items || [];
|
|
211
210
|
|
|
212
211
|
return items
|
|
213
212
|
.map((item) => ({
|
|
214
|
-
question: item.
|
|
215
|
-
answer: item.
|
|
216
|
-
links: item.
|
|
213
|
+
question: item.title || null,
|
|
214
|
+
answer: item.paragraphs || [],
|
|
215
|
+
links: item.links || [],
|
|
217
216
|
}))
|
|
218
217
|
.filter((item) => item.question);
|
|
219
218
|
}
|
|
@@ -226,16 +225,16 @@ function faq(parsed) {
|
|
|
226
225
|
* @returns {Array} Pricing tiers
|
|
227
226
|
*/
|
|
228
227
|
function pricing(parsed) {
|
|
229
|
-
const items = parsed
|
|
228
|
+
const items = parsed?.items || [];
|
|
230
229
|
|
|
231
230
|
return items
|
|
232
231
|
.map((item) => {
|
|
233
|
-
const firstList = first(item.
|
|
232
|
+
const firstList = first(item.lists);
|
|
234
233
|
|
|
235
234
|
return {
|
|
236
|
-
name: item.
|
|
237
|
-
price: item.
|
|
238
|
-
description: first(item.
|
|
235
|
+
name: item.title || null,
|
|
236
|
+
price: item.subtitle || null,
|
|
237
|
+
description: first(item.paragraphs) || null,
|
|
239
238
|
features: firstList
|
|
240
239
|
? firstList
|
|
241
240
|
.map((listItem) =>
|
|
@@ -243,13 +242,9 @@ function pricing(parsed) {
|
|
|
243
242
|
)
|
|
244
243
|
.filter(Boolean)
|
|
245
244
|
: [],
|
|
246
|
-
cta:
|
|
247
|
-
first(item.body?.links) ||
|
|
248
|
-
first(item.body?.buttons) ||
|
|
249
|
-
null,
|
|
245
|
+
cta: first(item.links) || first(item.buttons) || null,
|
|
250
246
|
highlighted:
|
|
251
|
-
item.
|
|
252
|
-
false,
|
|
247
|
+
item.pretitle?.toLowerCase().includes("popular") || false,
|
|
253
248
|
};
|
|
254
249
|
})
|
|
255
250
|
.filter((tier) => tier.name);
|
|
@@ -263,17 +258,17 @@ function pricing(parsed) {
|
|
|
263
258
|
* @returns {Array} Team members
|
|
264
259
|
*/
|
|
265
260
|
function team(parsed) {
|
|
266
|
-
const items = parsed
|
|
261
|
+
const items = parsed?.items || [];
|
|
267
262
|
|
|
268
263
|
return items
|
|
269
264
|
.map((item) => ({
|
|
270
|
-
name: item.
|
|
271
|
-
role: item.
|
|
272
|
-
department: item.
|
|
273
|
-
bio: item.
|
|
274
|
-
image: first(item.
|
|
275
|
-
imageAlt: first(item.
|
|
276
|
-
links: item.
|
|
265
|
+
name: item.title || null,
|
|
266
|
+
role: item.subtitle || null,
|
|
267
|
+
department: item.pretitle || null,
|
|
268
|
+
bio: item.paragraphs || [],
|
|
269
|
+
image: first(item.imgs)?.url || null,
|
|
270
|
+
imageAlt: first(item.imgs)?.alt || null,
|
|
271
|
+
links: item.links || [],
|
|
277
272
|
}))
|
|
278
273
|
.filter((member) => member.name);
|
|
279
274
|
}
|
|
@@ -292,14 +287,14 @@ function gallery(parsed, options = {}) {
|
|
|
292
287
|
const images = [];
|
|
293
288
|
|
|
294
289
|
if (source === "main" || source === "all") {
|
|
295
|
-
const mainImages = parsed
|
|
290
|
+
const mainImages = parsed?.imgs || [];
|
|
296
291
|
images.push(...mainImages);
|
|
297
292
|
}
|
|
298
293
|
|
|
299
294
|
if (source === "items" || source === "all") {
|
|
300
|
-
const items = parsed
|
|
295
|
+
const items = parsed?.items || [];
|
|
301
296
|
items.forEach((item) => {
|
|
302
|
-
const itemImages = item.
|
|
297
|
+
const itemImages = item.imgs || [];
|
|
303
298
|
images.push(...itemImages);
|
|
304
299
|
});
|
|
305
300
|
}
|
|
@@ -315,26 +310,24 @@ function gallery(parsed, options = {}) {
|
|
|
315
310
|
* Extract content in legacy Article class format
|
|
316
311
|
* Used for backward compatibility with existing components
|
|
317
312
|
*
|
|
318
|
-
* This extractor transforms the new parser output into the
|
|
313
|
+
* This extractor transforms the new flat parser output into the nested format
|
|
319
314
|
* used by the legacy Article class, enabling drop-in replacement without
|
|
320
315
|
* breaking existing components.
|
|
321
316
|
*
|
|
322
|
-
* @param {Object} parsed - Parsed content from parseContent()
|
|
323
|
-
* @returns {Object} Legacy format { main, items }
|
|
317
|
+
* @param {Object} parsed - Parsed content from parseContent() (flat structure)
|
|
318
|
+
* @returns {Object} Legacy format { main, items } with nested header/body structure
|
|
324
319
|
*
|
|
325
320
|
* @example
|
|
326
321
|
* const { parseContent, mappers } = require('@uniweb/semantic-parser');
|
|
327
|
-
* const parsed = parseContent(doc
|
|
322
|
+
* const parsed = parseContent(doc);
|
|
328
323
|
* const legacy = mappers.extractors.legacy(parsed);
|
|
329
|
-
* // Returns: { main: {...}, items: [...] }
|
|
324
|
+
* // Returns: { main: { header: {...}, body: {...} }, items: [...] }
|
|
330
325
|
*/
|
|
331
326
|
function legacy(parsed) {
|
|
332
|
-
const
|
|
333
|
-
|
|
334
|
-
const transformGroup = (group) => {
|
|
335
|
-
if (!group) return null;
|
|
327
|
+
const transformToNested = (content) => {
|
|
328
|
+
if (!content) return null;
|
|
336
329
|
|
|
337
|
-
let imgs =
|
|
330
|
+
let imgs = content.imgs || [];
|
|
338
331
|
let banner = imgs.filter((item) => {
|
|
339
332
|
return (item.role = "banner");
|
|
340
333
|
})?.[0];
|
|
@@ -343,41 +336,41 @@ function legacy(parsed) {
|
|
|
343
336
|
|
|
344
337
|
return {
|
|
345
338
|
header: {
|
|
346
|
-
title:
|
|
347
|
-
subtitle:
|
|
348
|
-
subtitle2:
|
|
349
|
-
pretitle:
|
|
339
|
+
title: content.title || "",
|
|
340
|
+
subtitle: content.subtitle || "",
|
|
341
|
+
subtitle2: content.subtitle2 || "",
|
|
342
|
+
pretitle: content.pretitle || "",
|
|
350
343
|
// Auto-fill description (legacy behavior)
|
|
351
344
|
description:
|
|
352
|
-
|
|
353
|
-
first(
|
|
345
|
+
content.subtitle2 ||
|
|
346
|
+
first(content.paragraphs) ||
|
|
354
347
|
"",
|
|
355
|
-
alignment:
|
|
348
|
+
alignment: content.alignment || "",
|
|
356
349
|
},
|
|
357
350
|
banner,
|
|
358
351
|
body: {
|
|
359
|
-
paragraphs:
|
|
360
|
-
headings:
|
|
352
|
+
paragraphs: content.paragraphs || [],
|
|
353
|
+
headings: content.headings || [],
|
|
361
354
|
imgs,
|
|
362
|
-
videos:
|
|
363
|
-
lists:
|
|
364
|
-
links:
|
|
365
|
-
icons:
|
|
366
|
-
buttons:
|
|
367
|
-
cards:
|
|
368
|
-
documents:
|
|
369
|
-
forms:
|
|
370
|
-
form: first(
|
|
371
|
-
quotes:
|
|
372
|
-
properties:
|
|
373
|
-
propertyBlocks:
|
|
355
|
+
videos: content.videos || [],
|
|
356
|
+
lists: content.lists || [],
|
|
357
|
+
links: content.links || [],
|
|
358
|
+
icons: content.icons || [],
|
|
359
|
+
buttons: content.buttons || [],
|
|
360
|
+
cards: content.cards || [],
|
|
361
|
+
documents: content.documents || [],
|
|
362
|
+
forms: content.forms || [],
|
|
363
|
+
form: first(content.forms) || null,
|
|
364
|
+
quotes: content.quotes || [],
|
|
365
|
+
properties: content.properties || {},
|
|
366
|
+
propertyBlocks: content.propertyBlocks || [],
|
|
374
367
|
},
|
|
375
368
|
};
|
|
376
369
|
};
|
|
377
370
|
|
|
378
371
|
return {
|
|
379
|
-
main:
|
|
380
|
-
items: (
|
|
372
|
+
main: transformToNested(parsed),
|
|
373
|
+
items: (parsed?.items || []).map(transformToNested),
|
|
381
374
|
};
|
|
382
375
|
}
|
|
383
376
|
|
package/src/processors/groups.js
CHANGED
|
@@ -1,42 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Flatten a group's nested structure to a flat object
|
|
3
|
+
* @param {Object} group Processed group with { header, body, metadata }
|
|
4
|
+
* @returns {Object} Flat content object
|
|
5
|
+
*/
|
|
6
|
+
function flattenGroup(group) {
|
|
7
|
+
if (!group) return null;
|
|
8
|
+
return {
|
|
9
|
+
title: group.header.title || '',
|
|
10
|
+
pretitle: group.header.pretitle || '',
|
|
11
|
+
subtitle: group.header.subtitle || '',
|
|
12
|
+
subtitle2: group.header.subtitle2 || '',
|
|
13
|
+
alignment: group.header.alignment || null,
|
|
14
|
+
paragraphs: group.body.paragraphs || [],
|
|
15
|
+
links: group.body.links || [],
|
|
16
|
+
imgs: group.body.imgs || [],
|
|
17
|
+
icons: group.body.icons || [],
|
|
18
|
+
lists: group.body.lists || [],
|
|
19
|
+
videos: group.body.videos || [],
|
|
20
|
+
buttons: group.body.buttons || [],
|
|
21
|
+
properties: group.body.properties || {},
|
|
22
|
+
propertyBlocks: group.body.propertyBlocks || [],
|
|
23
|
+
cards: group.body.cards || [],
|
|
24
|
+
documents: group.body.documents || [],
|
|
25
|
+
forms: group.body.forms || [],
|
|
26
|
+
quotes: group.body.quotes || [],
|
|
27
|
+
headings: group.body.headings || [],
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
|
|
1
31
|
/**
|
|
2
32
|
* Transform a sequence into content groups with semantic structure
|
|
3
33
|
* @param {Array} sequence Flat sequence of elements
|
|
4
34
|
* @param {Object} options Parsing options
|
|
5
|
-
* @returns {Object}
|
|
35
|
+
* @returns {Object} Flat content object with items array
|
|
6
36
|
*/
|
|
7
37
|
function processGroups(sequence, options = {}) {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
38
|
+
// Empty content returns flat empty structure
|
|
39
|
+
if (!sequence.length) {
|
|
40
|
+
return {
|
|
41
|
+
title: '',
|
|
42
|
+
pretitle: '',
|
|
43
|
+
subtitle: '',
|
|
44
|
+
subtitle2: '',
|
|
45
|
+
alignment: null,
|
|
46
|
+
paragraphs: [],
|
|
47
|
+
links: [],
|
|
48
|
+
imgs: [],
|
|
49
|
+
icons: [],
|
|
50
|
+
lists: [],
|
|
51
|
+
videos: [],
|
|
52
|
+
buttons: [],
|
|
53
|
+
properties: {},
|
|
54
|
+
propertyBlocks: [],
|
|
55
|
+
cards: [],
|
|
56
|
+
documents: [],
|
|
57
|
+
forms: [],
|
|
58
|
+
quotes: [],
|
|
59
|
+
headings: [],
|
|
60
|
+
items: [],
|
|
61
|
+
};
|
|
62
|
+
}
|
|
18
63
|
|
|
19
64
|
const groups = splitBySlices(sequence);
|
|
20
65
|
|
|
21
|
-
// Process each group's structure
|
|
66
|
+
// Process each group's structure (still nested internally)
|
|
22
67
|
const processedGroups = groups.map((group) => processGroupContent(group));
|
|
23
68
|
|
|
24
|
-
//
|
|
25
|
-
|
|
26
|
-
|
|
69
|
+
// Determine main vs items
|
|
70
|
+
let mainGroup = null;
|
|
71
|
+
let itemGroups = [];
|
|
72
|
+
|
|
73
|
+
const shouldBeMain = identifyMainContent(processedGroups);
|
|
74
|
+
if (shouldBeMain) {
|
|
75
|
+
mainGroup = processedGroups[0];
|
|
76
|
+
itemGroups = processedGroups.slice(1);
|
|
27
77
|
} else {
|
|
28
|
-
|
|
29
|
-
const shouldBeMain = identifyMainContent(processedGroups);
|
|
30
|
-
if (shouldBeMain) {
|
|
31
|
-
result.main = processedGroups[0];
|
|
32
|
-
result.items = processedGroups.slice(1);
|
|
33
|
-
} else {
|
|
34
|
-
result.items = processedGroups;
|
|
35
|
-
}
|
|
78
|
+
itemGroups = processedGroups;
|
|
36
79
|
}
|
|
37
80
|
|
|
38
|
-
//
|
|
39
|
-
|
|
81
|
+
// Flatten main content (or return empty flat structure)
|
|
82
|
+
const flatMain = flattenGroup(mainGroup) || {
|
|
83
|
+
title: '',
|
|
84
|
+
pretitle: '',
|
|
85
|
+
subtitle: '',
|
|
86
|
+
subtitle2: '',
|
|
87
|
+
alignment: null,
|
|
88
|
+
paragraphs: [],
|
|
89
|
+
links: [],
|
|
90
|
+
imgs: [],
|
|
91
|
+
icons: [],
|
|
92
|
+
lists: [],
|
|
93
|
+
videos: [],
|
|
94
|
+
buttons: [],
|
|
95
|
+
properties: {},
|
|
96
|
+
propertyBlocks: [],
|
|
97
|
+
cards: [],
|
|
98
|
+
documents: [],
|
|
99
|
+
forms: [],
|
|
100
|
+
quotes: [],
|
|
101
|
+
headings: [],
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
// Flatten items
|
|
105
|
+
const flatItems = itemGroups.map(flattenGroup);
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
...flatMain,
|
|
109
|
+
items: flatItems,
|
|
110
|
+
};
|
|
40
111
|
}
|
|
41
112
|
|
|
42
113
|
function splitBySlices(sequence) {
|
package/src/processors/byType.js
DELETED
|
@@ -1,130 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Organize content elements by their type while preserving context
|
|
3
|
-
* @param {Array} sequence Flat sequence of elements
|
|
4
|
-
* @returns {Object} Content organized by type
|
|
5
|
-
*/
|
|
6
|
-
function processByType(sequence) {
|
|
7
|
-
const collections = {
|
|
8
|
-
headings: [],
|
|
9
|
-
paragraphs: [],
|
|
10
|
-
images: {
|
|
11
|
-
background: [],
|
|
12
|
-
content: [],
|
|
13
|
-
gallery: [],
|
|
14
|
-
icon: [],
|
|
15
|
-
},
|
|
16
|
-
lists: [],
|
|
17
|
-
dividers: [],
|
|
18
|
-
metadata: {
|
|
19
|
-
totalElements: sequence.length,
|
|
20
|
-
dominantType: null,
|
|
21
|
-
hasMedia: false,
|
|
22
|
-
},
|
|
23
|
-
};
|
|
24
|
-
|
|
25
|
-
// Track type frequencies for metadata
|
|
26
|
-
const typeFrequency = new Map();
|
|
27
|
-
|
|
28
|
-
sequence.forEach((element, index) => {
|
|
29
|
-
// Track element type frequency
|
|
30
|
-
typeFrequency.set(element.type, (typeFrequency.get(element.type) || 0) + 1);
|
|
31
|
-
|
|
32
|
-
// Add context information
|
|
33
|
-
const context = getElementContext(sequence, index);
|
|
34
|
-
const enrichedElement = { ...element, context };
|
|
35
|
-
|
|
36
|
-
// Process element based on type
|
|
37
|
-
switch (element.type) {
|
|
38
|
-
case "heading":
|
|
39
|
-
collections.headings.push(enrichedElement);
|
|
40
|
-
break;
|
|
41
|
-
|
|
42
|
-
case "paragraph":
|
|
43
|
-
collections.paragraphs.push(enrichedElement);
|
|
44
|
-
break;
|
|
45
|
-
|
|
46
|
-
case "image": {
|
|
47
|
-
// Support both attrs.role and top-level role for backwards compatibility
|
|
48
|
-
const role = element.attrs?.role || element.role || "content";
|
|
49
|
-
if (!collections.images[role]) {
|
|
50
|
-
collections.images[role] = [];
|
|
51
|
-
}
|
|
52
|
-
collections.images[role].push(enrichedElement);
|
|
53
|
-
collections.metadata.hasMedia = true;
|
|
54
|
-
break;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
case "list":
|
|
58
|
-
collections.lists.push(enrichedElement);
|
|
59
|
-
break;
|
|
60
|
-
|
|
61
|
-
case "divider":
|
|
62
|
-
collections.dividers.push(enrichedElement);
|
|
63
|
-
break;
|
|
64
|
-
}
|
|
65
|
-
});
|
|
66
|
-
|
|
67
|
-
// Calculate dominant type
|
|
68
|
-
let maxFrequency = 0;
|
|
69
|
-
typeFrequency.forEach((frequency, type) => {
|
|
70
|
-
if (frequency > maxFrequency) {
|
|
71
|
-
maxFrequency = frequency;
|
|
72
|
-
collections.metadata.dominantType = type;
|
|
73
|
-
}
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
// Add helper methods
|
|
77
|
-
addCollectionHelpers(collections);
|
|
78
|
-
|
|
79
|
-
return collections;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
/**
|
|
83
|
-
* Get context information for an element
|
|
84
|
-
*/
|
|
85
|
-
function getElementContext(sequence, position) {
|
|
86
|
-
const context = {
|
|
87
|
-
position,
|
|
88
|
-
previousElement: position > 0 ? sequence[position - 1] : null,
|
|
89
|
-
nextElement: position < sequence.length - 1 ? sequence[position + 1] : null,
|
|
90
|
-
nearestHeading: null,
|
|
91
|
-
};
|
|
92
|
-
|
|
93
|
-
// Find nearest preceding heading
|
|
94
|
-
for (let i = position - 1; i >= 0; i--) {
|
|
95
|
-
if (sequence[i].type === "heading") {
|
|
96
|
-
context.nearestHeading = sequence[i];
|
|
97
|
-
break;
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
return context;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
/**
|
|
105
|
-
* Add helper methods to collections
|
|
106
|
-
*/
|
|
107
|
-
function addCollectionHelpers(collections) {
|
|
108
|
-
// Get headings of specific level
|
|
109
|
-
collections.getHeadingsByLevel = function (level) {
|
|
110
|
-
return this.headings.filter((h) => h.level === level);
|
|
111
|
-
};
|
|
112
|
-
|
|
113
|
-
// Get elements by heading context
|
|
114
|
-
collections.getElementsByHeadingContext = function (headingFilter) {
|
|
115
|
-
const allElements = [
|
|
116
|
-
...this.paragraphs,
|
|
117
|
-
...Object.values(this.images).flat(),
|
|
118
|
-
...this.lists,
|
|
119
|
-
];
|
|
120
|
-
|
|
121
|
-
return allElements.filter(
|
|
122
|
-
(el) =>
|
|
123
|
-
el.context?.nearestHeading && headingFilter(el.context.nearestHeading)
|
|
124
|
-
);
|
|
125
|
-
};
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
export {
|
|
129
|
-
processByType
|
|
130
|
-
};
|