@uniweb/semantic-parser 1.0.7 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +136 -0
- package/README.md +52 -104
- package/docs/api.md +38 -40
- package/docs/mapping-patterns.md +47 -47
- package/docs/text-component-reference.md +3 -3
- package/package.json +1 -1
- package/src/index.js +5 -7
- package/src/mappers/extractors.js +113 -120
- package/src/processors/groups.js +96 -25
- package/src/processors/byType.js +0 -130
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Pre-built extractors for common component patterns
|
|
3
|
+
*
|
|
4
|
+
* All extractors work with the flat content structure:
|
|
5
|
+
* - Root level: title, pretitle, subtitle, paragraphs, links, imgs, items, etc.
|
|
6
|
+
* - Items array: each item has flat structure (title, paragraphs, etc.)
|
|
3
7
|
*/
|
|
4
8
|
|
|
5
9
|
import { first, joinParagraphs } from "./helpers.js";
|
|
@@ -12,18 +16,16 @@ import { first, joinParagraphs } from "./helpers.js";
|
|
|
12
16
|
* @returns {Object} Hero component data
|
|
13
17
|
*/
|
|
14
18
|
function hero(parsed) {
|
|
15
|
-
const main = parsed.groups?.main;
|
|
16
|
-
|
|
17
19
|
return {
|
|
18
|
-
title:
|
|
19
|
-
subtitle:
|
|
20
|
-
kicker:
|
|
21
|
-
description:
|
|
22
|
-
image: first(
|
|
23
|
-
imageAlt: first(
|
|
24
|
-
banner:
|
|
25
|
-
cta: first(
|
|
26
|
-
button: first(
|
|
20
|
+
title: parsed?.title || null,
|
|
21
|
+
subtitle: parsed?.subtitle || null,
|
|
22
|
+
kicker: parsed?.pretitle || null,
|
|
23
|
+
description: parsed?.paragraphs || [],
|
|
24
|
+
image: first(parsed?.imgs)?.url || null,
|
|
25
|
+
imageAlt: first(parsed?.imgs)?.alt || null,
|
|
26
|
+
banner: null, // Banner detection would need to be added separately
|
|
27
|
+
cta: first(parsed?.links) || null,
|
|
28
|
+
button: first(parsed?.buttons) || null,
|
|
27
29
|
};
|
|
28
30
|
}
|
|
29
31
|
|
|
@@ -40,30 +42,30 @@ function hero(parsed) {
|
|
|
40
42
|
function card(parsed, options = {}) {
|
|
41
43
|
const { useItems = false, itemIndex } = options;
|
|
42
44
|
|
|
43
|
-
const extractCard = (
|
|
44
|
-
if (!
|
|
45
|
+
const extractCard = (content) => {
|
|
46
|
+
if (!content) return null;
|
|
45
47
|
|
|
46
48
|
return {
|
|
47
|
-
title:
|
|
48
|
-
subtitle:
|
|
49
|
-
description:
|
|
50
|
-
image: first(
|
|
51
|
-
imageAlt: first(
|
|
52
|
-
icon: first(
|
|
53
|
-
link: first(
|
|
54
|
-
button: first(
|
|
49
|
+
title: content.title || null,
|
|
50
|
+
subtitle: content.subtitle || null,
|
|
51
|
+
description: content.paragraphs || [],
|
|
52
|
+
image: first(content.imgs)?.url || null,
|
|
53
|
+
imageAlt: first(content.imgs)?.alt || null,
|
|
54
|
+
icon: first(content.icons) || null,
|
|
55
|
+
link: first(content.links) || null,
|
|
56
|
+
button: first(content.buttons) || null,
|
|
55
57
|
};
|
|
56
58
|
};
|
|
57
59
|
|
|
58
60
|
if (useItems) {
|
|
59
|
-
const items = parsed
|
|
61
|
+
const items = parsed?.items || [];
|
|
60
62
|
if (itemIndex !== undefined) {
|
|
61
63
|
return extractCard(items[itemIndex]);
|
|
62
64
|
}
|
|
63
65
|
return items.map(extractCard).filter(Boolean);
|
|
64
66
|
}
|
|
65
67
|
|
|
66
|
-
return extractCard(parsed
|
|
68
|
+
return extractCard(parsed);
|
|
67
69
|
}
|
|
68
70
|
|
|
69
71
|
/**
|
|
@@ -74,19 +76,17 @@ function card(parsed, options = {}) {
|
|
|
74
76
|
* @returns {Object} Article data
|
|
75
77
|
*/
|
|
76
78
|
function article(parsed) {
|
|
77
|
-
const main = parsed.groups?.main;
|
|
78
|
-
|
|
79
79
|
return {
|
|
80
|
-
title:
|
|
81
|
-
subtitle:
|
|
82
|
-
kicker:
|
|
83
|
-
author:
|
|
84
|
-
date:
|
|
85
|
-
banner:
|
|
86
|
-
content:
|
|
87
|
-
images:
|
|
88
|
-
videos:
|
|
89
|
-
links:
|
|
80
|
+
title: parsed?.title || null,
|
|
81
|
+
subtitle: parsed?.subtitle || null,
|
|
82
|
+
kicker: parsed?.pretitle || null,
|
|
83
|
+
author: null, // Would need metadata support
|
|
84
|
+
date: null, // Would need metadata support
|
|
85
|
+
banner: null, // Banner detection would need to be added separately
|
|
86
|
+
content: parsed?.paragraphs || [],
|
|
87
|
+
images: parsed?.imgs || [],
|
|
88
|
+
videos: parsed?.videos || [],
|
|
89
|
+
links: parsed?.links || [],
|
|
90
90
|
};
|
|
91
91
|
}
|
|
92
92
|
|
|
@@ -98,14 +98,13 @@ function article(parsed) {
|
|
|
98
98
|
* @returns {Array} Array of stat objects
|
|
99
99
|
*/
|
|
100
100
|
function stats(parsed) {
|
|
101
|
-
const items = parsed
|
|
101
|
+
const items = parsed?.items || [];
|
|
102
102
|
|
|
103
103
|
return items
|
|
104
104
|
.map((item) => ({
|
|
105
|
-
value: item.
|
|
106
|
-
label:
|
|
107
|
-
|
|
108
|
-
description: item.body?.paragraphs || [],
|
|
105
|
+
value: item.title || null,
|
|
106
|
+
label: item.subtitle || first(item.paragraphs) || null,
|
|
107
|
+
description: item.paragraphs || [],
|
|
109
108
|
}))
|
|
110
109
|
.filter((stat) => stat.value);
|
|
111
110
|
}
|
|
@@ -118,17 +117,17 @@ function stats(parsed) {
|
|
|
118
117
|
* @returns {Array} Navigation items
|
|
119
118
|
*/
|
|
120
119
|
function navigation(parsed) {
|
|
121
|
-
const items = parsed
|
|
120
|
+
const items = parsed?.items || [];
|
|
122
121
|
|
|
123
122
|
return items
|
|
124
123
|
.map((item) => {
|
|
125
124
|
const navItem = {
|
|
126
|
-
label: item.
|
|
127
|
-
href: first(item.
|
|
125
|
+
label: item.title || null,
|
|
126
|
+
href: first(item.links)?.href || null,
|
|
128
127
|
};
|
|
129
128
|
|
|
130
129
|
// Extract children from nested lists
|
|
131
|
-
const firstList = first(item.
|
|
130
|
+
const firstList = first(item.lists);
|
|
132
131
|
if (firstList && firstList.length > 0) {
|
|
133
132
|
navItem.children = firstList
|
|
134
133
|
.map((listItem) => ({
|
|
@@ -152,16 +151,16 @@ function navigation(parsed) {
|
|
|
152
151
|
* @returns {Array} Feature items
|
|
153
152
|
*/
|
|
154
153
|
function features(parsed) {
|
|
155
|
-
const items = parsed
|
|
154
|
+
const items = parsed?.items || [];
|
|
156
155
|
|
|
157
156
|
return items
|
|
158
157
|
.map((item) => ({
|
|
159
|
-
title: item.
|
|
160
|
-
subtitle: item.
|
|
161
|
-
description: item.
|
|
162
|
-
icon: first(item.
|
|
163
|
-
image: first(item.
|
|
164
|
-
link: first(item.
|
|
158
|
+
title: item.title || null,
|
|
159
|
+
subtitle: item.subtitle || null,
|
|
160
|
+
description: item.paragraphs || [],
|
|
161
|
+
icon: first(item.icons) || null,
|
|
162
|
+
image: first(item.imgs)?.url || null,
|
|
163
|
+
link: first(item.links) || null,
|
|
165
164
|
}))
|
|
166
165
|
.filter((feature) => feature.title);
|
|
167
166
|
}
|
|
@@ -178,25 +177,25 @@ function features(parsed) {
|
|
|
178
177
|
function testimonial(parsed, options = {}) {
|
|
179
178
|
const { useItems = false } = options;
|
|
180
179
|
|
|
181
|
-
const extractTestimonial = (
|
|
182
|
-
if (!
|
|
180
|
+
const extractTestimonial = (content) => {
|
|
181
|
+
if (!content) return null;
|
|
183
182
|
|
|
184
183
|
return {
|
|
185
|
-
quote:
|
|
186
|
-
author:
|
|
187
|
-
role:
|
|
188
|
-
company:
|
|
189
|
-
image: first(
|
|
190
|
-
imageAlt: first(
|
|
184
|
+
quote: content.paragraphs || [],
|
|
185
|
+
author: content.title || null,
|
|
186
|
+
role: content.subtitle || null,
|
|
187
|
+
company: content.pretitle || null,
|
|
188
|
+
image: first(content.imgs)?.url || null,
|
|
189
|
+
imageAlt: first(content.imgs)?.alt || null,
|
|
191
190
|
};
|
|
192
191
|
};
|
|
193
192
|
|
|
194
193
|
if (useItems) {
|
|
195
|
-
const items = parsed
|
|
194
|
+
const items = parsed?.items || [];
|
|
196
195
|
return items.map(extractTestimonial).filter(Boolean);
|
|
197
196
|
}
|
|
198
197
|
|
|
199
|
-
return extractTestimonial(parsed
|
|
198
|
+
return extractTestimonial(parsed);
|
|
200
199
|
}
|
|
201
200
|
|
|
202
201
|
/**
|
|
@@ -207,13 +206,13 @@ function testimonial(parsed, options = {}) {
|
|
|
207
206
|
* @returns {Array} FAQ items
|
|
208
207
|
*/
|
|
209
208
|
function faq(parsed) {
|
|
210
|
-
const items = parsed
|
|
209
|
+
const items = parsed?.items || [];
|
|
211
210
|
|
|
212
211
|
return items
|
|
213
212
|
.map((item) => ({
|
|
214
|
-
question: item.
|
|
215
|
-
answer: item.
|
|
216
|
-
links: item.
|
|
213
|
+
question: item.title || null,
|
|
214
|
+
answer: item.paragraphs || [],
|
|
215
|
+
links: item.links || [],
|
|
217
216
|
}))
|
|
218
217
|
.filter((item) => item.question);
|
|
219
218
|
}
|
|
@@ -226,16 +225,16 @@ function faq(parsed) {
|
|
|
226
225
|
* @returns {Array} Pricing tiers
|
|
227
226
|
*/
|
|
228
227
|
function pricing(parsed) {
|
|
229
|
-
const items = parsed
|
|
228
|
+
const items = parsed?.items || [];
|
|
230
229
|
|
|
231
230
|
return items
|
|
232
231
|
.map((item) => {
|
|
233
|
-
const firstList = first(item.
|
|
232
|
+
const firstList = first(item.lists);
|
|
234
233
|
|
|
235
234
|
return {
|
|
236
|
-
name: item.
|
|
237
|
-
price: item.
|
|
238
|
-
description: first(item.
|
|
235
|
+
name: item.title || null,
|
|
236
|
+
price: item.subtitle || null,
|
|
237
|
+
description: first(item.paragraphs) || null,
|
|
239
238
|
features: firstList
|
|
240
239
|
? firstList
|
|
241
240
|
.map((listItem) =>
|
|
@@ -243,13 +242,9 @@ function pricing(parsed) {
|
|
|
243
242
|
)
|
|
244
243
|
.filter(Boolean)
|
|
245
244
|
: [],
|
|
246
|
-
cta:
|
|
247
|
-
first(item.body?.links) ||
|
|
248
|
-
first(item.body?.buttons) ||
|
|
249
|
-
null,
|
|
245
|
+
cta: first(item.links) || first(item.buttons) || null,
|
|
250
246
|
highlighted:
|
|
251
|
-
item.
|
|
252
|
-
false,
|
|
247
|
+
item.pretitle?.toLowerCase().includes("popular") || false,
|
|
253
248
|
};
|
|
254
249
|
})
|
|
255
250
|
.filter((tier) => tier.name);
|
|
@@ -263,17 +258,17 @@ function pricing(parsed) {
|
|
|
263
258
|
* @returns {Array} Team members
|
|
264
259
|
*/
|
|
265
260
|
function team(parsed) {
|
|
266
|
-
const items = parsed
|
|
261
|
+
const items = parsed?.items || [];
|
|
267
262
|
|
|
268
263
|
return items
|
|
269
264
|
.map((item) => ({
|
|
270
|
-
name: item.
|
|
271
|
-
role: item.
|
|
272
|
-
department: item.
|
|
273
|
-
bio: item.
|
|
274
|
-
image: first(item.
|
|
275
|
-
imageAlt: first(item.
|
|
276
|
-
links: item.
|
|
265
|
+
name: item.title || null,
|
|
266
|
+
role: item.subtitle || null,
|
|
267
|
+
department: item.pretitle || null,
|
|
268
|
+
bio: item.paragraphs || [],
|
|
269
|
+
image: first(item.imgs)?.url || null,
|
|
270
|
+
imageAlt: first(item.imgs)?.alt || null,
|
|
271
|
+
links: item.links || [],
|
|
277
272
|
}))
|
|
278
273
|
.filter((member) => member.name);
|
|
279
274
|
}
|
|
@@ -292,14 +287,14 @@ function gallery(parsed, options = {}) {
|
|
|
292
287
|
const images = [];
|
|
293
288
|
|
|
294
289
|
if (source === "main" || source === "all") {
|
|
295
|
-
const mainImages = parsed
|
|
290
|
+
const mainImages = parsed?.imgs || [];
|
|
296
291
|
images.push(...mainImages);
|
|
297
292
|
}
|
|
298
293
|
|
|
299
294
|
if (source === "items" || source === "all") {
|
|
300
|
-
const items = parsed
|
|
295
|
+
const items = parsed?.items || [];
|
|
301
296
|
items.forEach((item) => {
|
|
302
|
-
const itemImages = item.
|
|
297
|
+
const itemImages = item.imgs || [];
|
|
303
298
|
images.push(...itemImages);
|
|
304
299
|
});
|
|
305
300
|
}
|
|
@@ -315,26 +310,24 @@ function gallery(parsed, options = {}) {
|
|
|
315
310
|
* Extract content in legacy Article class format
|
|
316
311
|
* Used for backward compatibility with existing components
|
|
317
312
|
*
|
|
318
|
-
* This extractor transforms the new parser output into the
|
|
313
|
+
* This extractor transforms the new flat parser output into the nested format
|
|
319
314
|
* used by the legacy Article class, enabling drop-in replacement without
|
|
320
315
|
* breaking existing components.
|
|
321
316
|
*
|
|
322
|
-
* @param {Object} parsed - Parsed content from parseContent()
|
|
323
|
-
* @returns {Object} Legacy format { main, items }
|
|
317
|
+
* @param {Object} parsed - Parsed content from parseContent() (flat structure)
|
|
318
|
+
* @returns {Object} Legacy format { main, items } with nested header/body structure
|
|
324
319
|
*
|
|
325
320
|
* @example
|
|
326
321
|
* const { parseContent, mappers } = require('@uniweb/semantic-parser');
|
|
327
|
-
* const parsed = parseContent(doc
|
|
322
|
+
* const parsed = parseContent(doc);
|
|
328
323
|
* const legacy = mappers.extractors.legacy(parsed);
|
|
329
|
-
* // Returns: { main: {...}, items: [...] }
|
|
324
|
+
* // Returns: { main: { header: {...}, body: {...} }, items: [...] }
|
|
330
325
|
*/
|
|
331
326
|
function legacy(parsed) {
|
|
332
|
-
const
|
|
333
|
-
|
|
334
|
-
const transformGroup = (group) => {
|
|
335
|
-
if (!group) return null;
|
|
327
|
+
const transformToNested = (content) => {
|
|
328
|
+
if (!content) return null;
|
|
336
329
|
|
|
337
|
-
let imgs =
|
|
330
|
+
let imgs = content.imgs || [];
|
|
338
331
|
let banner = imgs.filter((item) => {
|
|
339
332
|
return (item.role = "banner");
|
|
340
333
|
})?.[0];
|
|
@@ -343,41 +336,41 @@ function legacy(parsed) {
|
|
|
343
336
|
|
|
344
337
|
return {
|
|
345
338
|
header: {
|
|
346
|
-
title:
|
|
347
|
-
subtitle:
|
|
348
|
-
subtitle2:
|
|
349
|
-
pretitle:
|
|
339
|
+
title: content.title || "",
|
|
340
|
+
subtitle: content.subtitle || "",
|
|
341
|
+
subtitle2: content.subtitle2 || "",
|
|
342
|
+
pretitle: content.pretitle || "",
|
|
350
343
|
// Auto-fill description (legacy behavior)
|
|
351
344
|
description:
|
|
352
|
-
|
|
353
|
-
first(
|
|
345
|
+
content.subtitle2 ||
|
|
346
|
+
first(content.paragraphs) ||
|
|
354
347
|
"",
|
|
355
|
-
alignment:
|
|
348
|
+
alignment: content.alignment || "",
|
|
356
349
|
},
|
|
357
350
|
banner,
|
|
358
351
|
body: {
|
|
359
|
-
paragraphs:
|
|
360
|
-
headings:
|
|
352
|
+
paragraphs: content.paragraphs || [],
|
|
353
|
+
headings: content.headings || [],
|
|
361
354
|
imgs,
|
|
362
|
-
videos:
|
|
363
|
-
lists:
|
|
364
|
-
links:
|
|
365
|
-
icons:
|
|
366
|
-
buttons:
|
|
367
|
-
cards:
|
|
368
|
-
documents:
|
|
369
|
-
forms:
|
|
370
|
-
form: first(
|
|
371
|
-
quotes:
|
|
372
|
-
properties:
|
|
373
|
-
propertyBlocks:
|
|
355
|
+
videos: content.videos || [],
|
|
356
|
+
lists: content.lists || [],
|
|
357
|
+
links: content.links || [],
|
|
358
|
+
icons: content.icons || [],
|
|
359
|
+
buttons: content.buttons || [],
|
|
360
|
+
cards: content.cards || [],
|
|
361
|
+
documents: content.documents || [],
|
|
362
|
+
forms: content.forms || [],
|
|
363
|
+
form: first(content.forms) || null,
|
|
364
|
+
quotes: content.quotes || [],
|
|
365
|
+
properties: content.properties || {},
|
|
366
|
+
propertyBlocks: content.propertyBlocks || [],
|
|
374
367
|
},
|
|
375
368
|
};
|
|
376
369
|
};
|
|
377
370
|
|
|
378
371
|
return {
|
|
379
|
-
main:
|
|
380
|
-
items: (
|
|
372
|
+
main: transformToNested(parsed),
|
|
373
|
+
items: (parsed?.items || []).map(transformToNested),
|
|
381
374
|
};
|
|
382
375
|
}
|
|
383
376
|
|
package/src/processors/groups.js
CHANGED
|
@@ -1,42 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Flatten a group's nested structure to a flat object
|
|
3
|
+
* @param {Object} group Processed group with { header, body, metadata }
|
|
4
|
+
* @returns {Object} Flat content object
|
|
5
|
+
*/
|
|
6
|
+
function flattenGroup(group) {
|
|
7
|
+
if (!group) return null;
|
|
8
|
+
return {
|
|
9
|
+
title: group.header.title || '',
|
|
10
|
+
pretitle: group.header.pretitle || '',
|
|
11
|
+
subtitle: group.header.subtitle || '',
|
|
12
|
+
subtitle2: group.header.subtitle2 || '',
|
|
13
|
+
alignment: group.header.alignment || null,
|
|
14
|
+
paragraphs: group.body.paragraphs || [],
|
|
15
|
+
links: group.body.links || [],
|
|
16
|
+
imgs: group.body.imgs || [],
|
|
17
|
+
icons: group.body.icons || [],
|
|
18
|
+
lists: group.body.lists || [],
|
|
19
|
+
videos: group.body.videos || [],
|
|
20
|
+
buttons: group.body.buttons || [],
|
|
21
|
+
properties: group.body.properties || {},
|
|
22
|
+
propertyBlocks: group.body.propertyBlocks || [],
|
|
23
|
+
cards: group.body.cards || [],
|
|
24
|
+
documents: group.body.documents || [],
|
|
25
|
+
forms: group.body.forms || [],
|
|
26
|
+
quotes: group.body.quotes || [],
|
|
27
|
+
headings: group.body.headings || [],
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
|
|
1
31
|
/**
|
|
2
32
|
* Transform a sequence into content groups with semantic structure
|
|
3
33
|
* @param {Array} sequence Flat sequence of elements
|
|
4
34
|
* @param {Object} options Parsing options
|
|
5
|
-
* @returns {Object}
|
|
35
|
+
* @returns {Object} Flat content object with items array
|
|
6
36
|
*/
|
|
7
37
|
function processGroups(sequence, options = {}) {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
38
|
+
// Empty content returns flat empty structure
|
|
39
|
+
if (!sequence.length) {
|
|
40
|
+
return {
|
|
41
|
+
title: '',
|
|
42
|
+
pretitle: '',
|
|
43
|
+
subtitle: '',
|
|
44
|
+
subtitle2: '',
|
|
45
|
+
alignment: null,
|
|
46
|
+
paragraphs: [],
|
|
47
|
+
links: [],
|
|
48
|
+
imgs: [],
|
|
49
|
+
icons: [],
|
|
50
|
+
lists: [],
|
|
51
|
+
videos: [],
|
|
52
|
+
buttons: [],
|
|
53
|
+
properties: {},
|
|
54
|
+
propertyBlocks: [],
|
|
55
|
+
cards: [],
|
|
56
|
+
documents: [],
|
|
57
|
+
forms: [],
|
|
58
|
+
quotes: [],
|
|
59
|
+
headings: [],
|
|
60
|
+
items: [],
|
|
61
|
+
};
|
|
62
|
+
}
|
|
18
63
|
|
|
19
64
|
const groups = splitBySlices(sequence);
|
|
20
65
|
|
|
21
|
-
// Process each group's structure
|
|
66
|
+
// Process each group's structure (still nested internally)
|
|
22
67
|
const processedGroups = groups.map((group) => processGroupContent(group));
|
|
23
68
|
|
|
24
|
-
//
|
|
25
|
-
|
|
26
|
-
|
|
69
|
+
// Determine main vs items
|
|
70
|
+
let mainGroup = null;
|
|
71
|
+
let itemGroups = [];
|
|
72
|
+
|
|
73
|
+
const shouldBeMain = identifyMainContent(processedGroups);
|
|
74
|
+
if (shouldBeMain) {
|
|
75
|
+
mainGroup = processedGroups[0];
|
|
76
|
+
itemGroups = processedGroups.slice(1);
|
|
27
77
|
} else {
|
|
28
|
-
|
|
29
|
-
const shouldBeMain = identifyMainContent(processedGroups);
|
|
30
|
-
if (shouldBeMain) {
|
|
31
|
-
result.main = processedGroups[0];
|
|
32
|
-
result.items = processedGroups.slice(1);
|
|
33
|
-
} else {
|
|
34
|
-
result.items = processedGroups;
|
|
35
|
-
}
|
|
78
|
+
itemGroups = processedGroups;
|
|
36
79
|
}
|
|
37
80
|
|
|
38
|
-
//
|
|
39
|
-
|
|
81
|
+
// Flatten main content (or return empty flat structure)
|
|
82
|
+
const flatMain = flattenGroup(mainGroup) || {
|
|
83
|
+
title: '',
|
|
84
|
+
pretitle: '',
|
|
85
|
+
subtitle: '',
|
|
86
|
+
subtitle2: '',
|
|
87
|
+
alignment: null,
|
|
88
|
+
paragraphs: [],
|
|
89
|
+
links: [],
|
|
90
|
+
imgs: [],
|
|
91
|
+
icons: [],
|
|
92
|
+
lists: [],
|
|
93
|
+
videos: [],
|
|
94
|
+
buttons: [],
|
|
95
|
+
properties: {},
|
|
96
|
+
propertyBlocks: [],
|
|
97
|
+
cards: [],
|
|
98
|
+
documents: [],
|
|
99
|
+
forms: [],
|
|
100
|
+
quotes: [],
|
|
101
|
+
headings: [],
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
// Flatten items
|
|
105
|
+
const flatItems = itemGroups.map(flattenGroup);
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
...flatMain,
|
|
109
|
+
items: flatItems,
|
|
110
|
+
};
|
|
40
111
|
}
|
|
41
112
|
|
|
42
113
|
function splitBySlices(sequence) {
|
package/src/processors/byType.js
DELETED
|
@@ -1,130 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Organize content elements by their type while preserving context
|
|
3
|
-
* @param {Array} sequence Flat sequence of elements
|
|
4
|
-
* @returns {Object} Content organized by type
|
|
5
|
-
*/
|
|
6
|
-
function processByType(sequence) {
|
|
7
|
-
const collections = {
|
|
8
|
-
headings: [],
|
|
9
|
-
paragraphs: [],
|
|
10
|
-
images: {
|
|
11
|
-
background: [],
|
|
12
|
-
content: [],
|
|
13
|
-
gallery: [],
|
|
14
|
-
icon: [],
|
|
15
|
-
},
|
|
16
|
-
lists: [],
|
|
17
|
-
dividers: [],
|
|
18
|
-
metadata: {
|
|
19
|
-
totalElements: sequence.length,
|
|
20
|
-
dominantType: null,
|
|
21
|
-
hasMedia: false,
|
|
22
|
-
},
|
|
23
|
-
};
|
|
24
|
-
|
|
25
|
-
// Track type frequencies for metadata
|
|
26
|
-
const typeFrequency = new Map();
|
|
27
|
-
|
|
28
|
-
sequence.forEach((element, index) => {
|
|
29
|
-
// Track element type frequency
|
|
30
|
-
typeFrequency.set(element.type, (typeFrequency.get(element.type) || 0) + 1);
|
|
31
|
-
|
|
32
|
-
// Add context information
|
|
33
|
-
const context = getElementContext(sequence, index);
|
|
34
|
-
const enrichedElement = { ...element, context };
|
|
35
|
-
|
|
36
|
-
// Process element based on type
|
|
37
|
-
switch (element.type) {
|
|
38
|
-
case "heading":
|
|
39
|
-
collections.headings.push(enrichedElement);
|
|
40
|
-
break;
|
|
41
|
-
|
|
42
|
-
case "paragraph":
|
|
43
|
-
collections.paragraphs.push(enrichedElement);
|
|
44
|
-
break;
|
|
45
|
-
|
|
46
|
-
case "image": {
|
|
47
|
-
// Support both attrs.role and top-level role for backwards compatibility
|
|
48
|
-
const role = element.attrs?.role || element.role || "content";
|
|
49
|
-
if (!collections.images[role]) {
|
|
50
|
-
collections.images[role] = [];
|
|
51
|
-
}
|
|
52
|
-
collections.images[role].push(enrichedElement);
|
|
53
|
-
collections.metadata.hasMedia = true;
|
|
54
|
-
break;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
case "list":
|
|
58
|
-
collections.lists.push(enrichedElement);
|
|
59
|
-
break;
|
|
60
|
-
|
|
61
|
-
case "divider":
|
|
62
|
-
collections.dividers.push(enrichedElement);
|
|
63
|
-
break;
|
|
64
|
-
}
|
|
65
|
-
});
|
|
66
|
-
|
|
67
|
-
// Calculate dominant type
|
|
68
|
-
let maxFrequency = 0;
|
|
69
|
-
typeFrequency.forEach((frequency, type) => {
|
|
70
|
-
if (frequency > maxFrequency) {
|
|
71
|
-
maxFrequency = frequency;
|
|
72
|
-
collections.metadata.dominantType = type;
|
|
73
|
-
}
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
// Add helper methods
|
|
77
|
-
addCollectionHelpers(collections);
|
|
78
|
-
|
|
79
|
-
return collections;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
/**
|
|
83
|
-
* Get context information for an element
|
|
84
|
-
*/
|
|
85
|
-
function getElementContext(sequence, position) {
|
|
86
|
-
const context = {
|
|
87
|
-
position,
|
|
88
|
-
previousElement: position > 0 ? sequence[position - 1] : null,
|
|
89
|
-
nextElement: position < sequence.length - 1 ? sequence[position + 1] : null,
|
|
90
|
-
nearestHeading: null,
|
|
91
|
-
};
|
|
92
|
-
|
|
93
|
-
// Find nearest preceding heading
|
|
94
|
-
for (let i = position - 1; i >= 0; i--) {
|
|
95
|
-
if (sequence[i].type === "heading") {
|
|
96
|
-
context.nearestHeading = sequence[i];
|
|
97
|
-
break;
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
return context;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
/**
|
|
105
|
-
* Add helper methods to collections
|
|
106
|
-
*/
|
|
107
|
-
function addCollectionHelpers(collections) {
|
|
108
|
-
// Get headings of specific level
|
|
109
|
-
collections.getHeadingsByLevel = function (level) {
|
|
110
|
-
return this.headings.filter((h) => h.level === level);
|
|
111
|
-
};
|
|
112
|
-
|
|
113
|
-
// Get elements by heading context
|
|
114
|
-
collections.getElementsByHeadingContext = function (headingFilter) {
|
|
115
|
-
const allElements = [
|
|
116
|
-
...this.paragraphs,
|
|
117
|
-
...Object.values(this.images).flat(),
|
|
118
|
-
...this.lists,
|
|
119
|
-
];
|
|
120
|
-
|
|
121
|
-
return allElements.filter(
|
|
122
|
-
(el) =>
|
|
123
|
-
el.context?.nearestHeading && headingFilter(el.context.nearestHeading)
|
|
124
|
-
);
|
|
125
|
-
};
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
export {
|
|
129
|
-
processByType
|
|
130
|
-
};
|