@uniweb/semantic-parser 1.0.7 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +136 -0
- package/README.md +52 -104
- package/docs/api.md +38 -40
- package/docs/mapping-patterns.md +47 -47
- package/docs/text-component-reference.md +3 -3
- package/package.json +1 -1
- package/src/index.js +5 -7
- package/src/mappers/extractors.js +113 -120
- package/src/processors/groups.js +96 -25
- package/src/processors/byType.js +0 -130
package/docs/mapping-patterns.md
CHANGED
|
@@ -27,17 +27,17 @@ Gracefully handles content issues with silent, automatic cleanup:
|
|
|
27
27
|
```js
|
|
28
28
|
const schema = {
|
|
29
29
|
title: {
|
|
30
|
-
path: "groups.main.
|
|
30
|
+
path: "groups.main.title",
|
|
31
31
|
type: "plaintext", // Auto-strips HTML markup
|
|
32
32
|
maxLength: 60 // Auto-truncates with smart boundaries
|
|
33
33
|
},
|
|
34
34
|
description: {
|
|
35
|
-
path: "groups.main.
|
|
35
|
+
path: "groups.main.paragraphs",
|
|
36
36
|
type: "excerpt", // Auto-creates excerpt from paragraphs
|
|
37
37
|
maxLength: 150
|
|
38
38
|
},
|
|
39
39
|
image: {
|
|
40
|
-
path: "groups.main.
|
|
40
|
+
path: "groups.main.imgs[0].url",
|
|
41
41
|
type: "image", // Normalizes image data
|
|
42
42
|
defaultValue: "/placeholder.jpg",
|
|
43
43
|
treatEmptyAsDefault: true
|
|
@@ -74,7 +74,7 @@ Strips all HTML markup, returning clean text. Perfect for titles, labels, and an
|
|
|
74
74
|
```js
|
|
75
75
|
{
|
|
76
76
|
title: {
|
|
77
|
-
path: "groups.main.
|
|
77
|
+
path: "groups.main.title",
|
|
78
78
|
type: "plaintext",
|
|
79
79
|
maxLength: 60, // Auto-truncate
|
|
80
80
|
boundary: "word", // or "sentence", "character"
|
|
@@ -94,7 +94,7 @@ Preserves safe HTML while removing dangerous tags (script, iframe, etc.).
|
|
|
94
94
|
```js
|
|
95
95
|
{
|
|
96
96
|
description: {
|
|
97
|
-
path: "groups.main.
|
|
97
|
+
path: "groups.main.paragraphs[0]",
|
|
98
98
|
type: "richtext",
|
|
99
99
|
allowedTags: ["strong", "em", "a", "br"], // Customize allowed tags
|
|
100
100
|
stripTags: ["script", "style"] // Additional tags to remove
|
|
@@ -112,7 +112,7 @@ Auto-generates excerpt from content, stripping markup and truncating intelligent
|
|
|
112
112
|
```js
|
|
113
113
|
{
|
|
114
114
|
excerpt: {
|
|
115
|
-
path: "groups.main.
|
|
115
|
+
path: "groups.main.paragraphs",
|
|
116
116
|
type: "excerpt",
|
|
117
117
|
maxLength: 150,
|
|
118
118
|
boundary: "word", // or "sentence"
|
|
@@ -131,7 +131,7 @@ Parses and optionally formats numbers.
|
|
|
131
131
|
```js
|
|
132
132
|
{
|
|
133
133
|
price: {
|
|
134
|
-
path: "groups.main.
|
|
134
|
+
path: "groups.main.title",
|
|
135
135
|
type: "number",
|
|
136
136
|
format: {
|
|
137
137
|
decimals: 2,
|
|
@@ -152,7 +152,7 @@ Normalizes image data structure.
|
|
|
152
152
|
```js
|
|
153
153
|
{
|
|
154
154
|
image: {
|
|
155
|
-
path: "groups.main.
|
|
155
|
+
path: "groups.main.imgs[0]",
|
|
156
156
|
type: "image",
|
|
157
157
|
defaultValue: "/placeholder.jpg",
|
|
158
158
|
defaultAlt: "Image"
|
|
@@ -170,7 +170,7 @@ Normalizes link data structure.
|
|
|
170
170
|
```js
|
|
171
171
|
{
|
|
172
172
|
cta: {
|
|
173
|
-
path: "groups.main.
|
|
173
|
+
path: "groups.main.links[0]",
|
|
174
174
|
type: "link"
|
|
175
175
|
}
|
|
176
176
|
}
|
|
@@ -212,34 +212,34 @@ const hints = mappers.validateSchema(parsed, schema, { mode: 'visual-editor' });
|
|
|
212
212
|
// Component declares its content requirements
|
|
213
213
|
const componentSchema = {
|
|
214
214
|
brand: {
|
|
215
|
-
path: "groups.main.
|
|
215
|
+
path: "groups.main.pretitle",
|
|
216
216
|
type: "plaintext",
|
|
217
217
|
maxLength: 20,
|
|
218
218
|
transform: (text) => text.toUpperCase()
|
|
219
219
|
},
|
|
220
220
|
title: {
|
|
221
|
-
path: "groups.main.
|
|
221
|
+
path: "groups.main.title",
|
|
222
222
|
type: "plaintext",
|
|
223
223
|
maxLength: 60,
|
|
224
224
|
required: true
|
|
225
225
|
},
|
|
226
226
|
subtitle: {
|
|
227
|
-
path: "groups.main.
|
|
227
|
+
path: "groups.main.subtitle",
|
|
228
228
|
type: "plaintext",
|
|
229
229
|
maxLength: 100
|
|
230
230
|
},
|
|
231
231
|
description: {
|
|
232
|
-
path: "groups.main.
|
|
232
|
+
path: "groups.main.paragraphs",
|
|
233
233
|
type: "excerpt",
|
|
234
234
|
maxLength: 200
|
|
235
235
|
},
|
|
236
236
|
image: {
|
|
237
|
-
path: "groups.main.
|
|
237
|
+
path: "groups.main.imgs[0].url",
|
|
238
238
|
type: "image",
|
|
239
239
|
defaultValue: "/placeholder.jpg"
|
|
240
240
|
},
|
|
241
241
|
cta: {
|
|
242
|
-
path: "groups.main.
|
|
242
|
+
path: "groups.main.links[0]",
|
|
243
243
|
type: "link"
|
|
244
244
|
}
|
|
245
245
|
};
|
|
@@ -272,8 +272,8 @@ const heroData = mappers.extractors.hero(parsed);
|
|
|
272
272
|
|
|
273
273
|
// Or use schema-based extraction
|
|
274
274
|
const customData = mappers.extractBySchema(parsed, {
|
|
275
|
-
title: "groups.main.
|
|
276
|
-
image: { path: "groups.main.
|
|
275
|
+
title: "groups.main.title",
|
|
276
|
+
image: { path: "groups.main.imgs[0].url", defaultValue: "/placeholder.jpg" }
|
|
277
277
|
});
|
|
278
278
|
```
|
|
279
279
|
|
|
@@ -291,7 +291,7 @@ const image = helpers.first(images, "/default.jpg");
|
|
|
291
291
|
const lastParagraph = helpers.last(paragraphs);
|
|
292
292
|
|
|
293
293
|
// Transform array
|
|
294
|
-
const titles = helpers.transformArray(items, item => item.
|
|
294
|
+
const titles = helpers.transformArray(items, item => item.title);
|
|
295
295
|
|
|
296
296
|
// Filter and transform
|
|
297
297
|
const h2s = helpers.filterArray(headings, h => h.level === 2, h => h.content);
|
|
@@ -308,7 +308,7 @@ const cleanArray = helpers.compact([null, "text", "", undefined, "more"]);
|
|
|
308
308
|
|
|
309
309
|
```js
|
|
310
310
|
// Get nested value safely
|
|
311
|
-
const title = helpers.get(parsed, "groups.main.
|
|
311
|
+
const title = helpers.get(parsed, "groups.main.title", "Untitled");
|
|
312
312
|
|
|
313
313
|
// Pick specific properties
|
|
314
314
|
const metadata = helpers.pick(parsed.groups.main, ["header", "banner"]);
|
|
@@ -337,7 +337,7 @@ if (!validation.valid) {
|
|
|
337
337
|
```js
|
|
338
338
|
// Wrap extraction in try-catch
|
|
339
339
|
const safeExtractor = helpers.safe((parsed) => {
|
|
340
|
-
return parsed.groups.main.
|
|
340
|
+
return parsed.groups.main.title.toUpperCase();
|
|
341
341
|
}, "DEFAULT");
|
|
342
342
|
|
|
343
343
|
const title = safeExtractor(parsed); // Won't throw if path is invalid
|
|
@@ -350,24 +350,24 @@ const title = safeExtractor(parsed); // Won't throw if path is invalid
|
|
|
350
350
|
```js
|
|
351
351
|
const { accessor } = mappers;
|
|
352
352
|
|
|
353
|
-
// Simple path
|
|
354
|
-
const title = accessor.getByPath(parsed, "groups.main.
|
|
353
|
+
// Simple path (flat structure)
|
|
354
|
+
const title = accessor.getByPath(parsed, "groups.main.title");
|
|
355
355
|
|
|
356
356
|
// Array index notation
|
|
357
|
-
const firstImage = accessor.getByPath(parsed, "groups.main.
|
|
357
|
+
const firstImage = accessor.getByPath(parsed, "groups.main.imgs[0].url");
|
|
358
358
|
|
|
359
359
|
// With default value
|
|
360
|
-
const image = accessor.getByPath(parsed, "groups.main.
|
|
360
|
+
const image = accessor.getByPath(parsed, "groups.main.imgs[0].url", {
|
|
361
361
|
defaultValue: "/placeholder.jpg"
|
|
362
362
|
});
|
|
363
363
|
|
|
364
364
|
// With transformation
|
|
365
|
-
const description = accessor.getByPath(parsed, "groups.main.
|
|
365
|
+
const description = accessor.getByPath(parsed, "groups.main.paragraphs", {
|
|
366
366
|
transform: (paragraphs) => paragraphs.join(" ")
|
|
367
367
|
});
|
|
368
368
|
|
|
369
369
|
// Required field (throws if missing)
|
|
370
|
-
const title = accessor.getByPath(parsed, "groups.main.
|
|
370
|
+
const title = accessor.getByPath(parsed, "groups.main.title", {
|
|
371
371
|
required: true
|
|
372
372
|
});
|
|
373
373
|
```
|
|
@@ -378,22 +378,22 @@ Extract multiple fields at once using a schema:
|
|
|
378
378
|
|
|
379
379
|
```js
|
|
380
380
|
const schema = {
|
|
381
|
-
// Shorthand: just the path
|
|
382
|
-
title: "groups.main.
|
|
381
|
+
// Shorthand: just the path (flat structure)
|
|
382
|
+
title: "groups.main.title",
|
|
383
383
|
|
|
384
384
|
// Full config with options
|
|
385
385
|
image: {
|
|
386
|
-
path: "groups.main.
|
|
386
|
+
path: "groups.main.imgs[0].url",
|
|
387
387
|
defaultValue: "/placeholder.jpg"
|
|
388
388
|
},
|
|
389
389
|
|
|
390
390
|
description: {
|
|
391
|
-
path: "groups.main.
|
|
391
|
+
path: "groups.main.paragraphs",
|
|
392
392
|
transform: (p) => p.join(" ")
|
|
393
393
|
},
|
|
394
394
|
|
|
395
395
|
cta: {
|
|
396
|
-
path: "groups.main.
|
|
396
|
+
path: "groups.main.links[0]",
|
|
397
397
|
required: false
|
|
398
398
|
}
|
|
399
399
|
};
|
|
@@ -412,15 +412,15 @@ const data = accessor.extractBySchema(parsed, schema);
|
|
|
412
412
|
Extract data from array of items:
|
|
413
413
|
|
|
414
414
|
```js
|
|
415
|
-
// Simple: extract single field from each item
|
|
416
|
-
const titles = accessor.mapArray(parsed, "groups.items", "
|
|
415
|
+
// Simple: extract single field from each item (flat structure)
|
|
416
|
+
const titles = accessor.mapArray(parsed, "groups.items", "title");
|
|
417
417
|
// ["Item 1", "Item 2", "Item 3"]
|
|
418
418
|
|
|
419
419
|
// Complex: extract multiple fields from each item
|
|
420
420
|
const cards = accessor.mapArray(parsed, "groups.items", {
|
|
421
|
-
title: "
|
|
422
|
-
text: { path: "
|
|
423
|
-
image: { path: "
|
|
421
|
+
title: "title",
|
|
422
|
+
text: { path: "paragraphs", transform: p => p.join(" ") },
|
|
423
|
+
image: { path: "imgs[0].url", defaultValue: "/default.jpg" }
|
|
424
424
|
});
|
|
425
425
|
// [
|
|
426
426
|
// { title: "...", text: "...", image: "..." },
|
|
@@ -436,11 +436,11 @@ if (accessor.hasPath(parsed, "groups.main.banner.url")) {
|
|
|
436
436
|
// Banner exists
|
|
437
437
|
}
|
|
438
438
|
|
|
439
|
-
// Get first existing path
|
|
439
|
+
// Get first existing path (flat structure)
|
|
440
440
|
const image = accessor.getFirstExisting(parsed, [
|
|
441
441
|
"groups.main.banner.url",
|
|
442
|
-
"groups.main.
|
|
443
|
-
"groups.items[0].
|
|
442
|
+
"groups.main.imgs[0].url",
|
|
443
|
+
"groups.items[0].imgs[0].url"
|
|
444
444
|
], "/fallback.jpg");
|
|
445
445
|
```
|
|
446
446
|
|
|
@@ -640,8 +640,8 @@ const enhancedData = {
|
|
|
640
640
|
relatedPosts: helpers.transformArray(
|
|
641
641
|
accessor.getByPath(parsed, "groups.items", { defaultValue: [] }),
|
|
642
642
|
item => ({
|
|
643
|
-
title: item.
|
|
644
|
-
link: helpers.first(item.
|
|
643
|
+
title: item.title,
|
|
644
|
+
link: helpers.first(item.links)
|
|
645
645
|
})
|
|
646
646
|
),
|
|
647
647
|
|
|
@@ -662,13 +662,13 @@ const componentSchema = {
|
|
|
662
662
|
content: {
|
|
663
663
|
type: "hero", // Use pre-built extractor
|
|
664
664
|
// OR
|
|
665
|
-
mapping: { // Use custom mapping
|
|
666
|
-
brand: "groups.main.
|
|
667
|
-
title: "groups.main.
|
|
668
|
-
subtitle: "groups.main.
|
|
669
|
-
image: { path: "groups.main.
|
|
665
|
+
mapping: { // Use custom mapping (flat paths)
|
|
666
|
+
brand: "groups.main.pretitle",
|
|
667
|
+
title: "groups.main.title",
|
|
668
|
+
subtitle: "groups.main.subtitle",
|
|
669
|
+
image: { path: "groups.main.imgs[0].url", defaultValue: "/default.jpg" },
|
|
670
670
|
actions: {
|
|
671
|
-
path: "groups.main.
|
|
671
|
+
path: "groups.main.links",
|
|
672
672
|
transform: links => links.map(l => ({ label: l.label, type: "primary" }))
|
|
673
673
|
}
|
|
674
674
|
}
|
|
@@ -274,9 +274,9 @@ function Card({ data }) {
|
|
|
274
274
|
import { getByPath, extractBySchema } from '@uniweb/semantic-parser/mappers/accessor';
|
|
275
275
|
|
|
276
276
|
const schema = {
|
|
277
|
-
title: { path: 'groups.main.
|
|
278
|
-
subtitle: { path: 'groups.main.
|
|
279
|
-
content: { path: 'groups.main.
|
|
277
|
+
title: { path: 'groups.main.title' },
|
|
278
|
+
subtitle: { path: 'groups.main.subtitle' },
|
|
279
|
+
content: { path: 'groups.main.paragraphs' }
|
|
280
280
|
};
|
|
281
281
|
|
|
282
282
|
const data = extractBySchema(parsed, schema);
|
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { processSequence } from "./processors/sequence.js";
|
|
2
2
|
import { processGroups } from "./processors/groups.js";
|
|
3
|
-
import { processByType } from "./processors/byType.js";
|
|
4
3
|
import * as mappers from "./mappers/index.js";
|
|
5
4
|
|
|
6
5
|
/**
|
|
@@ -8,7 +7,7 @@ import * as mappers from "./mappers/index.js";
|
|
|
8
7
|
* @param {Object} doc - ProseMirror document
|
|
9
8
|
* @param {Object} options - Parsing options
|
|
10
9
|
* @param {boolean} options.parseCodeAsJson - Parse code blocks as JSON. Default: false
|
|
11
|
-
* @returns {Object}
|
|
10
|
+
* @returns {Object} Flat content structure with sequence for ordered access
|
|
12
11
|
*/
|
|
13
12
|
function parseContent(doc, options = {}) {
|
|
14
13
|
// Default options
|
|
@@ -17,18 +16,17 @@ function parseContent(doc, options = {}) {
|
|
|
17
16
|
...options,
|
|
18
17
|
};
|
|
19
18
|
|
|
20
|
-
// Process
|
|
19
|
+
// Process sequence (ordered elements)
|
|
21
20
|
const sequence = processSequence(doc, opts);
|
|
22
21
|
|
|
22
|
+
// Process groups (semantic structure) - returns flat object
|
|
23
23
|
const groups = processGroups(sequence, opts);
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
// Return flat structure with sequence at top level
|
|
27
26
|
return {
|
|
28
27
|
raw: doc,
|
|
29
28
|
sequence,
|
|
30
|
-
groups,
|
|
31
|
-
byType,
|
|
29
|
+
...groups, // Spread flat content: title, paragraphs, items, etc.
|
|
32
30
|
};
|
|
33
31
|
}
|
|
34
32
|
|