@uniweb/semantic-parser 1.1.4 → 1.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +5 -4
- package/README.md +3 -3
- package/docs/api.md +2 -2
- package/docs/entity-consolidation.md +2 -2
- package/docs/mapping-patterns.md +11 -11
- package/package.json +1 -1
- package/src/mappers/accessor.js +2 -2
- package/src/mappers/extractors.js +17 -17
- package/src/processors/groups.js +15 -7
- package/src/processors/groups_backup.js +2 -2
package/AGENTS.md
CHANGED
|
@@ -64,12 +64,13 @@ The parser returns a flat content structure:
|
|
|
64
64
|
subtitle2: '', // Third heading level
|
|
65
65
|
paragraphs: [],
|
|
66
66
|
links: [], // All link-like entities (including buttons, documents)
|
|
67
|
-
|
|
67
|
+
images: [],
|
|
68
68
|
icons: [],
|
|
69
69
|
videos: [],
|
|
70
70
|
lists: [],
|
|
71
71
|
quotes: [],
|
|
72
|
-
|
|
72
|
+
snippets: [], // Fenced code — [{ language, code }]
|
|
73
|
+
data: {}, // Structured data (tagged data blocks, forms, cards)
|
|
73
74
|
headings: [], // Overflow headings after title/subtitle/subtitle2
|
|
74
75
|
items: [], // Child content groups (same structure recursively)
|
|
75
76
|
}
|
|
@@ -127,9 +128,9 @@ Editor-specific nodes are mapped to standard entities:
|
|
|
127
128
|
|
|
128
129
|
See `docs/entity-consolidation.md` for complete mapping documentation.
|
|
129
130
|
|
|
130
|
-
### Tagged
|
|
131
|
+
### Tagged Data Blocks
|
|
131
132
|
|
|
132
|
-
|
|
133
|
+
Data blocks with tags route parsed data to the `data` object:
|
|
133
134
|
|
|
134
135
|
```markdown
|
|
135
136
|
```yaml:nav-links
|
package/README.md
CHANGED
|
@@ -73,7 +73,7 @@ result = {
|
|
|
73
73
|
// Body fields
|
|
74
74
|
paragraphs: ["Get started today."],
|
|
75
75
|
links: [], // All links (including buttons, documents)
|
|
76
|
-
|
|
76
|
+
images: [],
|
|
77
77
|
videos: [],
|
|
78
78
|
icons: [],
|
|
79
79
|
lists: [],
|
|
@@ -164,7 +164,7 @@ const schema = {
|
|
|
164
164
|
maxLength: 150
|
|
165
165
|
},
|
|
166
166
|
image: {
|
|
167
|
-
path: "
|
|
167
|
+
path: "images[0].url",
|
|
168
168
|
type: "image",
|
|
169
169
|
defaultValue: "/placeholder.jpg"
|
|
170
170
|
}
|
|
@@ -214,7 +214,7 @@ const schema = {
|
|
|
214
214
|
title: "title",
|
|
215
215
|
subtitle: "subtitle",
|
|
216
216
|
image: {
|
|
217
|
-
path: "
|
|
217
|
+
path: "images[0].url",
|
|
218
218
|
defaultValue: "/placeholder.jpg"
|
|
219
219
|
},
|
|
220
220
|
actions: {
|
package/docs/api.md
CHANGED
|
@@ -130,7 +130,7 @@ Content organized into semantic groups with identified main content and items. T
|
|
|
130
130
|
|
|
131
131
|
// Body fields (flat)
|
|
132
132
|
paragraphs: ["paragraph text", ...],
|
|
133
|
-
|
|
133
|
+
images: [
|
|
134
134
|
{ url: "...", caption: "...", alt: "..." }
|
|
135
135
|
],
|
|
136
136
|
icons: ["<svg>...</svg>", ...],
|
|
@@ -160,7 +160,7 @@ Content organized into semantic groups with identified main content and items. T
|
|
|
160
160
|
},
|
|
161
161
|
items: [
|
|
162
162
|
// Array of groups with same flat structure as main
|
|
163
|
-
// { title, pretitle, subtitle, paragraphs,
|
|
163
|
+
// { title, pretitle, subtitle, paragraphs, images, ... }
|
|
164
164
|
],
|
|
165
165
|
metadata: {
|
|
166
166
|
dividerMode: false, // Whether dividers were used for grouping
|
|
@@ -29,7 +29,7 @@ After consolidation, the parser outputs this flat structure:
|
|
|
29
29
|
// Body fields
|
|
30
30
|
paragraphs: [], // Text blocks with inline HTML formatting
|
|
31
31
|
links: [], // All link-like entities (buttons, documents, nav links)
|
|
32
|
-
|
|
32
|
+
images: [], // All images (with role distinguishing purpose)
|
|
33
33
|
videos: [], // Video embeds
|
|
34
34
|
icons: [], // Standalone icons
|
|
35
35
|
lists: [], // Bullet/ordered lists (recursive structure)
|
|
@@ -94,7 +94,7 @@ All link-like content merges into the `links` array. The `role` attribute distin
|
|
|
94
94
|
|
|
95
95
|
### Images
|
|
96
96
|
|
|
97
|
-
All image content uses the `
|
|
97
|
+
All image content uses the `images` array. The `role` attribute distinguishes purpose.
|
|
98
98
|
|
|
99
99
|
```js
|
|
100
100
|
{
|
package/docs/mapping-patterns.md
CHANGED
|
@@ -37,7 +37,7 @@ const schema = {
|
|
|
37
37
|
maxLength: 150
|
|
38
38
|
},
|
|
39
39
|
image: {
|
|
40
|
-
path: "groups.main.
|
|
40
|
+
path: "groups.main.images[0].url",
|
|
41
41
|
type: "image", // Normalizes image data
|
|
42
42
|
defaultValue: "/placeholder.jpg",
|
|
43
43
|
treatEmptyAsDefault: true
|
|
@@ -152,7 +152,7 @@ Normalizes image data structure.
|
|
|
152
152
|
```js
|
|
153
153
|
{
|
|
154
154
|
image: {
|
|
155
|
-
path: "groups.main.
|
|
155
|
+
path: "groups.main.images[0]",
|
|
156
156
|
type: "image",
|
|
157
157
|
defaultValue: "/placeholder.jpg",
|
|
158
158
|
defaultAlt: "Image"
|
|
@@ -234,7 +234,7 @@ const componentSchema = {
|
|
|
234
234
|
maxLength: 200
|
|
235
235
|
},
|
|
236
236
|
image: {
|
|
237
|
-
path: "groups.main.
|
|
237
|
+
path: "groups.main.images[0].url",
|
|
238
238
|
type: "image",
|
|
239
239
|
defaultValue: "/placeholder.jpg"
|
|
240
240
|
},
|
|
@@ -273,7 +273,7 @@ const heroData = mappers.extractors.hero(parsed);
|
|
|
273
273
|
// Or use schema-based extraction
|
|
274
274
|
const customData = mappers.extractBySchema(parsed, {
|
|
275
275
|
title: "groups.main.title",
|
|
276
|
-
image: { path: "groups.main.
|
|
276
|
+
image: { path: "groups.main.images[0].url", defaultValue: "/placeholder.jpg" }
|
|
277
277
|
});
|
|
278
278
|
```
|
|
279
279
|
|
|
@@ -354,10 +354,10 @@ const { accessor } = mappers;
|
|
|
354
354
|
const title = accessor.getByPath(parsed, "groups.main.title");
|
|
355
355
|
|
|
356
356
|
// Array index notation
|
|
357
|
-
const firstImage = accessor.getByPath(parsed, "groups.main.
|
|
357
|
+
const firstImage = accessor.getByPath(parsed, "groups.main.images[0].url");
|
|
358
358
|
|
|
359
359
|
// With default value
|
|
360
|
-
const image = accessor.getByPath(parsed, "groups.main.
|
|
360
|
+
const image = accessor.getByPath(parsed, "groups.main.images[0].url", {
|
|
361
361
|
defaultValue: "/placeholder.jpg"
|
|
362
362
|
});
|
|
363
363
|
|
|
@@ -383,7 +383,7 @@ const schema = {
|
|
|
383
383
|
|
|
384
384
|
// Full config with options
|
|
385
385
|
image: {
|
|
386
|
-
path: "groups.main.
|
|
386
|
+
path: "groups.main.images[0].url",
|
|
387
387
|
defaultValue: "/placeholder.jpg"
|
|
388
388
|
},
|
|
389
389
|
|
|
@@ -420,7 +420,7 @@ const titles = accessor.mapArray(parsed, "groups.items", "title");
|
|
|
420
420
|
const cards = accessor.mapArray(parsed, "groups.items", {
|
|
421
421
|
title: "title",
|
|
422
422
|
text: { path: "paragraphs", transform: p => p.join(" ") },
|
|
423
|
-
image: { path: "
|
|
423
|
+
image: { path: "images[0].url", defaultValue: "/default.jpg" }
|
|
424
424
|
});
|
|
425
425
|
// [
|
|
426
426
|
// { title: "...", text: "...", image: "..." },
|
|
@@ -439,8 +439,8 @@ if (accessor.hasPath(parsed, "groups.main.banner.url")) {
|
|
|
439
439
|
// Get first existing path (flat structure)
|
|
440
440
|
const image = accessor.getFirstExisting(parsed, [
|
|
441
441
|
"groups.main.banner.url",
|
|
442
|
-
"groups.main.
|
|
443
|
-
"groups.items[0].
|
|
442
|
+
"groups.main.images[0].url",
|
|
443
|
+
"groups.items[0].images[0].url"
|
|
444
444
|
], "/fallback.jpg");
|
|
445
445
|
```
|
|
446
446
|
|
|
@@ -666,7 +666,7 @@ const componentSchema = {
|
|
|
666
666
|
brand: "groups.main.pretitle",
|
|
667
667
|
title: "groups.main.title",
|
|
668
668
|
subtitle: "groups.main.subtitle",
|
|
669
|
-
image: { path: "groups.main.
|
|
669
|
+
image: { path: "groups.main.images[0].url", defaultValue: "/default.jpg" },
|
|
670
670
|
actions: {
|
|
671
671
|
path: "groups.main.links",
|
|
672
672
|
transform: links => links.map(l => ({ label: l.label, type: "primary" }))
|
package/package.json
CHANGED
package/src/mappers/accessor.js
CHANGED
|
@@ -6,7 +6,7 @@ import { applyType, validateType } from './types.js';
|
|
|
6
6
|
|
|
7
7
|
/**
|
|
8
8
|
* Parse a path string into segments, handling array indices
|
|
9
|
-
* @param {string} path - Path string (e.g., 'groups.main.body.
|
|
9
|
+
* @param {string} path - Path string (e.g., 'groups.main.body.images[0].url')
|
|
10
10
|
* @returns {Array} Array of path segments
|
|
11
11
|
*/
|
|
12
12
|
function parsePath(path) {
|
|
@@ -118,7 +118,7 @@ function getByPath(parsed, path, options = {}) {
|
|
|
118
118
|
* maxLength: 60
|
|
119
119
|
* },
|
|
120
120
|
* image: {
|
|
121
|
-
* path: 'groups.main.body.
|
|
121
|
+
* path: 'groups.main.body.images[0].url',
|
|
122
122
|
* type: 'image',
|
|
123
123
|
* defaultValue: '/placeholder.jpg'
|
|
124
124
|
* },
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Pre-built extractors for common component patterns
|
|
3
3
|
*
|
|
4
4
|
* All extractors work with the flat content structure:
|
|
5
|
-
* - Root level: title, pretitle, subtitle, paragraphs, links,
|
|
5
|
+
* - Root level: title, pretitle, subtitle, paragraphs, links, images, items, etc.
|
|
6
6
|
* - Items array: each item has flat structure (title, paragraphs, etc.)
|
|
7
7
|
*/
|
|
8
8
|
|
|
@@ -25,8 +25,8 @@ function hero(parsed) {
|
|
|
25
25
|
subtitle: parsed?.subtitle || null,
|
|
26
26
|
kicker: parsed?.pretitle || null,
|
|
27
27
|
description: parsed?.paragraphs || [],
|
|
28
|
-
image: first(parsed?.
|
|
29
|
-
imageAlt: first(parsed?.
|
|
28
|
+
image: first(parsed?.images)?.url || null,
|
|
29
|
+
imageAlt: first(parsed?.images)?.alt || null,
|
|
30
30
|
banner: null, // Banner detection would need to be added separately
|
|
31
31
|
cta: buttonLink || plainLink || null,
|
|
32
32
|
};
|
|
@@ -56,8 +56,8 @@ function card(parsed, options = {}) {
|
|
|
56
56
|
title: content.title || null,
|
|
57
57
|
subtitle: content.subtitle || null,
|
|
58
58
|
description: content.paragraphs || [],
|
|
59
|
-
image: first(content.
|
|
60
|
-
imageAlt: first(content.
|
|
59
|
+
image: first(content.images)?.url || null,
|
|
60
|
+
imageAlt: first(content.images)?.alt || null,
|
|
61
61
|
icon: first(content.icons) || null,
|
|
62
62
|
link: plainLink || null,
|
|
63
63
|
cta: buttonLink || plainLink || null,
|
|
@@ -91,7 +91,7 @@ function article(parsed) {
|
|
|
91
91
|
date: null, // Would need metadata support
|
|
92
92
|
banner: null, // Banner detection would need to be added separately
|
|
93
93
|
content: parsed?.paragraphs || [],
|
|
94
|
-
images: parsed?.
|
|
94
|
+
images: parsed?.images || [],
|
|
95
95
|
videos: parsed?.videos || [],
|
|
96
96
|
links: parsed?.links || [],
|
|
97
97
|
};
|
|
@@ -166,7 +166,7 @@ function features(parsed) {
|
|
|
166
166
|
subtitle: item.subtitle || null,
|
|
167
167
|
description: item.paragraphs || [],
|
|
168
168
|
icon: first(item.icons) || null,
|
|
169
|
-
image: first(item.
|
|
169
|
+
image: first(item.images)?.url || null,
|
|
170
170
|
link: first(item.links) || null,
|
|
171
171
|
}))
|
|
172
172
|
.filter((feature) => feature.title);
|
|
@@ -192,8 +192,8 @@ function testimonial(parsed, options = {}) {
|
|
|
192
192
|
author: content.title || null,
|
|
193
193
|
role: content.subtitle || null,
|
|
194
194
|
company: content.pretitle || null,
|
|
195
|
-
image: first(content.
|
|
196
|
-
imageAlt: first(content.
|
|
195
|
+
image: first(content.images)?.url || null,
|
|
196
|
+
imageAlt: first(content.images)?.alt || null,
|
|
197
197
|
};
|
|
198
198
|
};
|
|
199
199
|
|
|
@@ -275,8 +275,8 @@ function team(parsed) {
|
|
|
275
275
|
role: item.subtitle || null,
|
|
276
276
|
department: item.pretitle || null,
|
|
277
277
|
bio: item.paragraphs || [],
|
|
278
|
-
image: first(item.
|
|
279
|
-
imageAlt: first(item.
|
|
278
|
+
image: first(item.images)?.url || null,
|
|
279
|
+
imageAlt: first(item.images)?.alt || null,
|
|
280
280
|
links: item.links || [],
|
|
281
281
|
}))
|
|
282
282
|
.filter((member) => member.name);
|
|
@@ -296,14 +296,14 @@ function gallery(parsed, options = {}) {
|
|
|
296
296
|
const images = [];
|
|
297
297
|
|
|
298
298
|
if (source === "main" || source === "all") {
|
|
299
|
-
const mainImages = parsed?.
|
|
299
|
+
const mainImages = parsed?.images || [];
|
|
300
300
|
images.push(...mainImages);
|
|
301
301
|
}
|
|
302
302
|
|
|
303
303
|
if (source === "items" || source === "all") {
|
|
304
304
|
const items = parsed?.items || [];
|
|
305
305
|
items.forEach((item) => {
|
|
306
|
-
const itemImages = item.
|
|
306
|
+
const itemImages = item.images || [];
|
|
307
307
|
images.push(...itemImages);
|
|
308
308
|
});
|
|
309
309
|
}
|
|
@@ -339,12 +339,12 @@ function legacy(parsed) {
|
|
|
339
339
|
const transformToNested = (content) => {
|
|
340
340
|
if (!content) return null;
|
|
341
341
|
|
|
342
|
-
let
|
|
343
|
-
let banner =
|
|
342
|
+
let images = content.images || [];
|
|
343
|
+
let banner = images.filter((item) => {
|
|
344
344
|
return (item.role = "banner");
|
|
345
345
|
})?.[0];
|
|
346
346
|
|
|
347
|
-
if (!banner) banner =
|
|
347
|
+
if (!banner) banner = images[0];
|
|
348
348
|
|
|
349
349
|
// Reconstruct deprecated fields from new structure
|
|
350
350
|
const links = content.links || [];
|
|
@@ -377,7 +377,7 @@ function legacy(parsed) {
|
|
|
377
377
|
body: {
|
|
378
378
|
paragraphs: content.paragraphs || [],
|
|
379
379
|
headings: content.headings || [],
|
|
380
|
-
|
|
380
|
+
images,
|
|
381
381
|
videos: content.videos || [],
|
|
382
382
|
lists: content.lists || [],
|
|
383
383
|
links: plainLinks,
|
package/src/processors/groups.js
CHANGED
|
@@ -12,11 +12,12 @@ function flattenGroup(group) {
|
|
|
12
12
|
subtitle2: group.header.subtitle2 || '',
|
|
13
13
|
paragraphs: group.body.paragraphs || [],
|
|
14
14
|
links: group.body.links || [],
|
|
15
|
-
|
|
15
|
+
images: group.body.images || [],
|
|
16
16
|
icons: group.body.icons || [],
|
|
17
17
|
lists: group.body.lists || [],
|
|
18
18
|
videos: group.body.videos || [],
|
|
19
19
|
insets: group.body.insets || [],
|
|
20
|
+
snippets: group.body.snippets || [],
|
|
20
21
|
data: group.body.data || {},
|
|
21
22
|
quotes: group.body.quotes || [],
|
|
22
23
|
headings: group.body.headings || [],
|
|
@@ -39,11 +40,12 @@ function processGroups(sequence, options = {}) {
|
|
|
39
40
|
subtitle2: '',
|
|
40
41
|
paragraphs: [],
|
|
41
42
|
links: [],
|
|
42
|
-
|
|
43
|
+
images: [],
|
|
43
44
|
icons: [],
|
|
44
45
|
lists: [],
|
|
45
46
|
videos: [],
|
|
46
47
|
insets: [],
|
|
48
|
+
snippets: [],
|
|
47
49
|
data: {},
|
|
48
50
|
quotes: [],
|
|
49
51
|
headings: [],
|
|
@@ -76,7 +78,7 @@ function processGroups(sequence, options = {}) {
|
|
|
76
78
|
subtitle2: '',
|
|
77
79
|
paragraphs: [],
|
|
78
80
|
links: [],
|
|
79
|
-
|
|
81
|
+
images: [],
|
|
80
82
|
icons: [],
|
|
81
83
|
lists: [],
|
|
82
84
|
videos: [],
|
|
@@ -229,10 +231,11 @@ function processGroupContent(elements) {
|
|
|
229
231
|
};
|
|
230
232
|
|
|
231
233
|
const body = {
|
|
232
|
-
|
|
234
|
+
images: [],
|
|
233
235
|
icons: [],
|
|
234
236
|
videos: [],
|
|
235
237
|
insets: [],
|
|
238
|
+
snippets: [],
|
|
236
239
|
paragraphs: [],
|
|
237
240
|
links: [],
|
|
238
241
|
lists: [],
|
|
@@ -320,7 +323,7 @@ function processGroupContent(elements) {
|
|
|
320
323
|
if (element.attrs?.role === "icon") {
|
|
321
324
|
body.icons.push(element.attrs);
|
|
322
325
|
} else {
|
|
323
|
-
body.
|
|
326
|
+
body.images.push(preserveProps);
|
|
324
327
|
}
|
|
325
328
|
break;
|
|
326
329
|
|
|
@@ -366,11 +369,16 @@ function processGroupContent(elements) {
|
|
|
366
369
|
break;
|
|
367
370
|
|
|
368
371
|
case "codeBlock":
|
|
369
|
-
// Fallback: tagged code blocks where parsing failed at build time
|
|
370
|
-
// Untagged blocks stay in sequence for display
|
|
371
372
|
const tag = element.attrs?.tag;
|
|
372
373
|
if (tag) {
|
|
374
|
+
// Tagged block where parsing failed at build time — store as data
|
|
373
375
|
body.data[tag] = element.text;
|
|
376
|
+
} else {
|
|
377
|
+
// Untagged code block — collect as a snippet
|
|
378
|
+
body.snippets.push({
|
|
379
|
+
language: element.attrs?.language || '',
|
|
380
|
+
code: typeof element.text === 'string' ? element.text : '',
|
|
381
|
+
});
|
|
374
382
|
}
|
|
375
383
|
break;
|
|
376
384
|
|
|
@@ -187,7 +187,7 @@ function processGroupContent(elements) {
|
|
|
187
187
|
};
|
|
188
188
|
let banner = null;
|
|
189
189
|
const body = {
|
|
190
|
-
|
|
190
|
+
images: [],
|
|
191
191
|
icons: [],
|
|
192
192
|
videos: [],
|
|
193
193
|
paragraphs: [],
|
|
@@ -259,7 +259,7 @@ function processGroupContent(elements) {
|
|
|
259
259
|
break;
|
|
260
260
|
|
|
261
261
|
case "image":
|
|
262
|
-
body.
|
|
262
|
+
body.images.push({
|
|
263
263
|
url: element.src,
|
|
264
264
|
caption: element.caption,
|
|
265
265
|
alt: element.alt,
|