@uniweb/semantic-parser 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,9 @@
1
1
  /**
2
2
  * Pre-built extractors for common component patterns
3
+ *
4
+ * All extractors work with the flat content structure:
5
+ * - Root level: title, pretitle, subtitle, paragraphs, links, imgs, items, etc.
6
+ * - Items array: each item has flat structure (title, paragraphs, etc.)
3
7
  */
4
8
 
5
9
  import { first, joinParagraphs } from "./helpers.js";
@@ -12,18 +16,16 @@ import { first, joinParagraphs } from "./helpers.js";
12
16
  * @returns {Object} Hero component data
13
17
  */
14
18
  function hero(parsed) {
15
- const main = parsed.groups?.main;
16
-
17
19
  return {
18
- title: main?.header?.title || null,
19
- subtitle: main?.header?.subtitle || null,
20
- kicker: main?.header?.pretitle || null,
21
- description: main?.body?.paragraphs || [],
22
- image: first(main?.body?.imgs)?.url || null,
23
- imageAlt: first(main?.body?.imgs)?.alt || null,
24
- banner: main?.banner?.url || null,
25
- cta: first(main?.body?.links) || null,
26
- button: first(main?.body?.buttons) || null,
20
+ title: parsed?.title || null,
21
+ subtitle: parsed?.subtitle || null,
22
+ kicker: parsed?.pretitle || null,
23
+ description: parsed?.paragraphs || [],
24
+ image: first(parsed?.imgs)?.url || null,
25
+ imageAlt: first(parsed?.imgs)?.alt || null,
26
+ banner: null, // Banner detection would need to be added separately
27
+ cta: first(parsed?.links) || null,
28
+ button: first(parsed?.buttons) || null,
27
29
  };
28
30
  }
29
31
 
@@ -40,30 +42,30 @@ function hero(parsed) {
40
42
  function card(parsed, options = {}) {
41
43
  const { useItems = false, itemIndex } = options;
42
44
 
43
- const extractCard = (group) => {
44
- if (!group) return null;
45
+ const extractCard = (content) => {
46
+ if (!content) return null;
45
47
 
46
48
  return {
47
- title: group.header?.title || null,
48
- subtitle: group.header?.subtitle || null,
49
- description: group.body?.paragraphs || [],
50
- image: first(group.body?.imgs)?.url || null,
51
- imageAlt: first(group.body?.imgs)?.alt || null,
52
- icon: first(group.body?.icons) || null,
53
- link: first(group.body?.links) || null,
54
- button: first(group.body?.buttons) || null,
49
+ title: content.title || null,
50
+ subtitle: content.subtitle || null,
51
+ description: content.paragraphs || [],
52
+ image: first(content.imgs)?.url || null,
53
+ imageAlt: first(content.imgs)?.alt || null,
54
+ icon: first(content.icons) || null,
55
+ link: first(content.links) || null,
56
+ button: first(content.buttons) || null,
55
57
  };
56
58
  };
57
59
 
58
60
  if (useItems) {
59
- const items = parsed.groups?.items || [];
61
+ const items = parsed?.items || [];
60
62
  if (itemIndex !== undefined) {
61
63
  return extractCard(items[itemIndex]);
62
64
  }
63
65
  return items.map(extractCard).filter(Boolean);
64
66
  }
65
67
 
66
- return extractCard(parsed.groups?.main);
68
+ return extractCard(parsed);
67
69
  }
68
70
 
69
71
  /**
@@ -74,19 +76,17 @@ function card(parsed, options = {}) {
74
76
  * @returns {Object} Article data
75
77
  */
76
78
  function article(parsed) {
77
- const main = parsed.groups?.main;
78
-
79
79
  return {
80
- title: main?.header?.title || null,
81
- subtitle: main?.header?.subtitle || null,
82
- kicker: main?.header?.pretitle || null,
83
- author: main?.metadata?.author || null,
84
- date: main?.metadata?.date || null,
85
- banner: main?.banner?.url || null,
86
- content: main?.body?.paragraphs || [],
87
- images: main?.body?.imgs || [],
88
- videos: main?.body?.videos || [],
89
- links: main?.body?.links || [],
80
+ title: parsed?.title || null,
81
+ subtitle: parsed?.subtitle || null,
82
+ kicker: parsed?.pretitle || null,
83
+ author: null, // Would need metadata support
84
+ date: null, // Would need metadata support
85
+ banner: null, // Banner detection would need to be added separately
86
+ content: parsed?.paragraphs || [],
87
+ images: parsed?.imgs || [],
88
+ videos: parsed?.videos || [],
89
+ links: parsed?.links || [],
90
90
  };
91
91
  }
92
92
 
@@ -98,14 +98,13 @@ function article(parsed) {
98
98
  * @returns {Array} Array of stat objects
99
99
  */
100
100
  function stats(parsed) {
101
- const items = parsed.groups?.items || [];
101
+ const items = parsed?.items || [];
102
102
 
103
103
  return items
104
104
  .map((item) => ({
105
- value: item.header?.title || null,
106
- label:
107
- item.header?.subtitle || first(item.body?.paragraphs) || null,
108
- description: item.body?.paragraphs || [],
105
+ value: item.title || null,
106
+ label: item.subtitle || first(item.paragraphs) || null,
107
+ description: item.paragraphs || [],
109
108
  }))
110
109
  .filter((stat) => stat.value);
111
110
  }
@@ -118,17 +117,17 @@ function stats(parsed) {
118
117
  * @returns {Array} Navigation items
119
118
  */
120
119
  function navigation(parsed) {
121
- const items = parsed.groups?.items || [];
120
+ const items = parsed?.items || [];
122
121
 
123
122
  return items
124
123
  .map((item) => {
125
124
  const navItem = {
126
- label: item.header?.title || null,
127
- href: first(item.body?.links)?.href || null,
125
+ label: item.title || null,
126
+ href: first(item.links)?.href || null,
128
127
  };
129
128
 
130
129
  // Extract children from nested lists
131
- const firstList = first(item.body?.lists);
130
+ const firstList = first(item.lists);
132
131
  if (firstList && firstList.length > 0) {
133
132
  navItem.children = firstList
134
133
  .map((listItem) => ({
@@ -152,16 +151,16 @@ function navigation(parsed) {
152
151
  * @returns {Array} Feature items
153
152
  */
154
153
  function features(parsed) {
155
- const items = parsed.groups?.items || [];
154
+ const items = parsed?.items || [];
156
155
 
157
156
  return items
158
157
  .map((item) => ({
159
- title: item.header?.title || null,
160
- subtitle: item.header?.subtitle || null,
161
- description: item.body?.paragraphs || [],
162
- icon: first(item.body?.icons) || null,
163
- image: first(item.body?.imgs)?.url || null,
164
- link: first(item.body?.links) || null,
158
+ title: item.title || null,
159
+ subtitle: item.subtitle || null,
160
+ description: item.paragraphs || [],
161
+ icon: first(item.icons) || null,
162
+ image: first(item.imgs)?.url || null,
163
+ link: first(item.links) || null,
165
164
  }))
166
165
  .filter((feature) => feature.title);
167
166
  }
@@ -178,25 +177,25 @@ function features(parsed) {
178
177
  function testimonial(parsed, options = {}) {
179
178
  const { useItems = false } = options;
180
179
 
181
- const extractTestimonial = (group) => {
182
- if (!group) return null;
180
+ const extractTestimonial = (content) => {
181
+ if (!content) return null;
183
182
 
184
183
  return {
185
- quote: group.body?.paragraphs || [],
186
- author: group.header?.title || null,
187
- role: group.header?.subtitle || null,
188
- company: group.header?.pretitle || null,
189
- image: first(group.body?.imgs)?.url || null,
190
- imageAlt: first(group.body?.imgs)?.alt || null,
184
+ quote: content.paragraphs || [],
185
+ author: content.title || null,
186
+ role: content.subtitle || null,
187
+ company: content.pretitle || null,
188
+ image: first(content.imgs)?.url || null,
189
+ imageAlt: first(content.imgs)?.alt || null,
191
190
  };
192
191
  };
193
192
 
194
193
  if (useItems) {
195
- const items = parsed.groups?.items || [];
194
+ const items = parsed?.items || [];
196
195
  return items.map(extractTestimonial).filter(Boolean);
197
196
  }
198
197
 
199
- return extractTestimonial(parsed.groups?.main);
198
+ return extractTestimonial(parsed);
200
199
  }
201
200
 
202
201
  /**
@@ -207,13 +206,13 @@ function testimonial(parsed, options = {}) {
207
206
  * @returns {Array} FAQ items
208
207
  */
209
208
  function faq(parsed) {
210
- const items = parsed.groups?.items || [];
209
+ const items = parsed?.items || [];
211
210
 
212
211
  return items
213
212
  .map((item) => ({
214
- question: item.header?.title || null,
215
- answer: item.body?.paragraphs || [],
216
- links: item.body?.links || [],
213
+ question: item.title || null,
214
+ answer: item.paragraphs || [],
215
+ links: item.links || [],
217
216
  }))
218
217
  .filter((item) => item.question);
219
218
  }
@@ -226,16 +225,16 @@ function faq(parsed) {
226
225
  * @returns {Array} Pricing tiers
227
226
  */
228
227
  function pricing(parsed) {
229
- const items = parsed.groups?.items || [];
228
+ const items = parsed?.items || [];
230
229
 
231
230
  return items
232
231
  .map((item) => {
233
- const firstList = first(item.body?.lists);
232
+ const firstList = first(item.lists);
234
233
 
235
234
  return {
236
- name: item.header?.title || null,
237
- price: item.header?.subtitle || null,
238
- description: first(item.body?.paragraphs) || null,
235
+ name: item.title || null,
236
+ price: item.subtitle || null,
237
+ description: first(item.paragraphs) || null,
239
238
  features: firstList
240
239
  ? firstList
241
240
  .map((listItem) =>
@@ -243,13 +242,9 @@ function pricing(parsed) {
243
242
  )
244
243
  .filter(Boolean)
245
244
  : [],
246
- cta:
247
- first(item.body?.links) ||
248
- first(item.body?.buttons) ||
249
- null,
245
+ cta: first(item.links) || first(item.buttons) || null,
250
246
  highlighted:
251
- item.header?.pretitle?.toLowerCase().includes("popular") ||
252
- false,
247
+ item.pretitle?.toLowerCase().includes("popular") || false,
253
248
  };
254
249
  })
255
250
  .filter((tier) => tier.name);
@@ -263,17 +258,17 @@ function pricing(parsed) {
263
258
  * @returns {Array} Team members
264
259
  */
265
260
  function team(parsed) {
266
- const items = parsed.groups?.items || [];
261
+ const items = parsed?.items || [];
267
262
 
268
263
  return items
269
264
  .map((item) => ({
270
- name: item.header?.title || null,
271
- role: item.header?.subtitle || null,
272
- department: item.header?.pretitle || null,
273
- bio: item.body?.paragraphs || [],
274
- image: first(item.body?.imgs)?.url || null,
275
- imageAlt: first(item.body?.imgs)?.alt || null,
276
- links: item.body?.links || [],
265
+ name: item.title || null,
266
+ role: item.subtitle || null,
267
+ department: item.pretitle || null,
268
+ bio: item.paragraphs || [],
269
+ image: first(item.imgs)?.url || null,
270
+ imageAlt: first(item.imgs)?.alt || null,
271
+ links: item.links || [],
277
272
  }))
278
273
  .filter((member) => member.name);
279
274
  }
@@ -292,14 +287,14 @@ function gallery(parsed, options = {}) {
292
287
  const images = [];
293
288
 
294
289
  if (source === "main" || source === "all") {
295
- const mainImages = parsed.groups?.main?.body?.imgs || [];
290
+ const mainImages = parsed?.imgs || [];
296
291
  images.push(...mainImages);
297
292
  }
298
293
 
299
294
  if (source === "items" || source === "all") {
300
- const items = parsed.groups?.items || [];
295
+ const items = parsed?.items || [];
301
296
  items.forEach((item) => {
302
- const itemImages = item.body?.imgs || [];
297
+ const itemImages = item.imgs || [];
303
298
  images.push(...itemImages);
304
299
  });
305
300
  }
@@ -315,26 +310,24 @@ function gallery(parsed, options = {}) {
315
310
  * Extract content in legacy Article class format
316
311
  * Used for backward compatibility with existing components
317
312
  *
318
- * This extractor transforms the new parser output into the exact format
313
+ * This extractor transforms the new flat parser output into the nested format
319
314
  * used by the legacy Article class, enabling drop-in replacement without
320
315
  * breaking existing components.
321
316
  *
322
- * @param {Object} parsed - Parsed content from parseContent()
323
- * @returns {Object} Legacy format { main, items }
317
+ * @param {Object} parsed - Parsed content from parseContent() (flat structure)
318
+ * @returns {Object} Legacy format { main, items } with nested header/body structure
324
319
  *
325
320
  * @example
326
321
  * const { parseContent, mappers } = require('@uniweb/semantic-parser');
327
- * const parsed = parseContent(doc, { pretitleLevel: 2, parseCodeAsJson: true });
322
+ * const parsed = parseContent(doc);
328
323
  * const legacy = mappers.extractors.legacy(parsed);
329
- * // Returns: { main: {...}, items: [...] }
324
+ * // Returns: { main: { header: {...}, body: {...} }, items: [...] }
330
325
  */
331
326
  function legacy(parsed) {
332
- const groups = parsed.groups || {};
333
-
334
- const transformGroup = (group) => {
335
- if (!group) return null;
327
+ const transformToNested = (content) => {
328
+ if (!content) return null;
336
329
 
337
- let imgs = group.body?.imgs || [];
330
+ let imgs = content.imgs || [];
338
331
  let banner = imgs.filter((item) => {
339
332
  return (item.role = "banner");
340
333
  })?.[0];
@@ -343,41 +336,41 @@ function legacy(parsed) {
343
336
 
344
337
  return {
345
338
  header: {
346
- title: group.header?.title || "",
347
- subtitle: group.header?.subtitle || "",
348
- subtitle2: group.header?.subtitle2 || "",
349
- pretitle: group.header?.pretitle || "",
339
+ title: content.title || "",
340
+ subtitle: content.subtitle || "",
341
+ subtitle2: content.subtitle2 || "",
342
+ pretitle: content.pretitle || "",
350
343
  // Auto-fill description (legacy behavior)
351
344
  description:
352
- group.header?.subtitle2 ||
353
- first(group.body?.paragraphs) ||
345
+ content.subtitle2 ||
346
+ first(content.paragraphs) ||
354
347
  "",
355
- alignment: group.header?.alignment || "",
348
+ alignment: content.alignment || "",
356
349
  },
357
350
  banner,
358
351
  body: {
359
- paragraphs: group.body?.paragraphs || [],
360
- headings: group.body?.headings || [],
352
+ paragraphs: content.paragraphs || [],
353
+ headings: content.headings || [],
361
354
  imgs,
362
- videos: group.body?.videos || [],
363
- lists: group.body?.lists || [],
364
- links: group.body?.links || [],
365
- icons: group.body?.icons || [],
366
- buttons: group.body?.buttons || [],
367
- cards: group.body?.cards || [],
368
- documents: group.body?.documents || [],
369
- forms: group.body?.forms || [],
370
- form: first(group.body?.forms) || null,
371
- quotes: group.body?.quotes || [],
372
- properties: group.body?.properties || {},
373
- propertyBlocks: group.body?.propertyBlocks || [],
355
+ videos: content.videos || [],
356
+ lists: content.lists || [],
357
+ links: content.links || [],
358
+ icons: content.icons || [],
359
+ buttons: content.buttons || [],
360
+ cards: content.cards || [],
361
+ documents: content.documents || [],
362
+ forms: content.forms || [],
363
+ form: first(content.forms) || null,
364
+ quotes: content.quotes || [],
365
+ properties: content.properties || {},
366
+ propertyBlocks: content.propertyBlocks || [],
374
367
  },
375
368
  };
376
369
  };
377
370
 
378
371
  return {
379
- main: transformGroup(groups.main),
380
- items: (groups.items || []).map(transformGroup),
372
+ main: transformToNested(parsed),
373
+ items: (parsed?.items || []).map(transformToNested),
381
374
  };
382
375
  }
383
376
 
@@ -1,42 +1,113 @@
1
+ /**
2
+ * Flatten a group's nested structure to a flat object
3
+ * @param {Object} group Processed group with { header, body, metadata }
4
+ * @returns {Object} Flat content object
5
+ */
6
+ function flattenGroup(group) {
7
+ if (!group) return null;
8
+ return {
9
+ title: group.header.title || '',
10
+ pretitle: group.header.pretitle || '',
11
+ subtitle: group.header.subtitle || '',
12
+ subtitle2: group.header.subtitle2 || '',
13
+ alignment: group.header.alignment || null,
14
+ paragraphs: group.body.paragraphs || [],
15
+ links: group.body.links || [],
16
+ imgs: group.body.imgs || [],
17
+ icons: group.body.icons || [],
18
+ lists: group.body.lists || [],
19
+ videos: group.body.videos || [],
20
+ buttons: group.body.buttons || [],
21
+ properties: group.body.properties || {},
22
+ propertyBlocks: group.body.propertyBlocks || [],
23
+ cards: group.body.cards || [],
24
+ documents: group.body.documents || [],
25
+ forms: group.body.forms || [],
26
+ quotes: group.body.quotes || [],
27
+ headings: group.body.headings || [],
28
+ };
29
+ }
30
+
1
31
  /**
2
32
  * Transform a sequence into content groups with semantic structure
3
33
  * @param {Array} sequence Flat sequence of elements
4
34
  * @param {Object} options Parsing options
5
- * @returns {Object} Content organized into groups with identified main content
35
+ * @returns {Object} Flat content object with items array
6
36
  */
7
37
  function processGroups(sequence, options = {}) {
8
- const result = {
9
- main: null,
10
- items: [],
11
- metadata: {
12
- dividerMode: false,
13
- groups: 0,
14
- },
15
- };
16
-
17
- if (!sequence.length) return result;
38
+ // Empty content returns flat empty structure
39
+ if (!sequence.length) {
40
+ return {
41
+ title: '',
42
+ pretitle: '',
43
+ subtitle: '',
44
+ subtitle2: '',
45
+ alignment: null,
46
+ paragraphs: [],
47
+ links: [],
48
+ imgs: [],
49
+ icons: [],
50
+ lists: [],
51
+ videos: [],
52
+ buttons: [],
53
+ properties: {},
54
+ propertyBlocks: [],
55
+ cards: [],
56
+ documents: [],
57
+ forms: [],
58
+ quotes: [],
59
+ headings: [],
60
+ items: [],
61
+ };
62
+ }
18
63
 
19
64
  const groups = splitBySlices(sequence);
20
65
 
21
- // Process each group's structure
66
+ // Process each group's structure (still nested internally)
22
67
  const processedGroups = groups.map((group) => processGroupContent(group));
23
68
 
24
- // Special handling for first group in divider mode
25
- if (result.metadata.dividerMode && groups.startsWithDivider) {
26
- result.items = processedGroups;
69
+ // Determine main vs items
70
+ let mainGroup = null;
71
+ let itemGroups = [];
72
+
73
+ const shouldBeMain = identifyMainContent(processedGroups);
74
+ if (shouldBeMain) {
75
+ mainGroup = processedGroups[0];
76
+ itemGroups = processedGroups.slice(1);
27
77
  } else {
28
- // Organize into main content and items
29
- const shouldBeMain = identifyMainContent(processedGroups);
30
- if (shouldBeMain) {
31
- result.main = processedGroups[0];
32
- result.items = processedGroups.slice(1);
33
- } else {
34
- result.items = processedGroups;
35
- }
78
+ itemGroups = processedGroups;
36
79
  }
37
80
 
38
- // result.metadata.groups = processedGroups.length;
39
- return result;
81
+ // Flatten main content (or return empty flat structure)
82
+ const flatMain = flattenGroup(mainGroup) || {
83
+ title: '',
84
+ pretitle: '',
85
+ subtitle: '',
86
+ subtitle2: '',
87
+ alignment: null,
88
+ paragraphs: [],
89
+ links: [],
90
+ imgs: [],
91
+ icons: [],
92
+ lists: [],
93
+ videos: [],
94
+ buttons: [],
95
+ properties: {},
96
+ propertyBlocks: [],
97
+ cards: [],
98
+ documents: [],
99
+ forms: [],
100
+ quotes: [],
101
+ headings: [],
102
+ };
103
+
104
+ // Flatten items
105
+ const flatItems = itemGroups.map(flattenGroup);
106
+
107
+ return {
108
+ ...flatMain,
109
+ items: flatItems,
110
+ };
40
111
  }
41
112
 
42
113
  function splitBySlices(sequence) {
@@ -1,130 +0,0 @@
1
- /**
2
- * Organize content elements by their type while preserving context
3
- * @param {Array} sequence Flat sequence of elements
4
- * @returns {Object} Content organized by type
5
- */
6
- function processByType(sequence) {
7
- const collections = {
8
- headings: [],
9
- paragraphs: [],
10
- images: {
11
- background: [],
12
- content: [],
13
- gallery: [],
14
- icon: [],
15
- },
16
- lists: [],
17
- dividers: [],
18
- metadata: {
19
- totalElements: sequence.length,
20
- dominantType: null,
21
- hasMedia: false,
22
- },
23
- };
24
-
25
- // Track type frequencies for metadata
26
- const typeFrequency = new Map();
27
-
28
- sequence.forEach((element, index) => {
29
- // Track element type frequency
30
- typeFrequency.set(element.type, (typeFrequency.get(element.type) || 0) + 1);
31
-
32
- // Add context information
33
- const context = getElementContext(sequence, index);
34
- const enrichedElement = { ...element, context };
35
-
36
- // Process element based on type
37
- switch (element.type) {
38
- case "heading":
39
- collections.headings.push(enrichedElement);
40
- break;
41
-
42
- case "paragraph":
43
- collections.paragraphs.push(enrichedElement);
44
- break;
45
-
46
- case "image": {
47
- // Support both attrs.role and top-level role for backwards compatibility
48
- const role = element.attrs?.role || element.role || "content";
49
- if (!collections.images[role]) {
50
- collections.images[role] = [];
51
- }
52
- collections.images[role].push(enrichedElement);
53
- collections.metadata.hasMedia = true;
54
- break;
55
- }
56
-
57
- case "list":
58
- collections.lists.push(enrichedElement);
59
- break;
60
-
61
- case "divider":
62
- collections.dividers.push(enrichedElement);
63
- break;
64
- }
65
- });
66
-
67
- // Calculate dominant type
68
- let maxFrequency = 0;
69
- typeFrequency.forEach((frequency, type) => {
70
- if (frequency > maxFrequency) {
71
- maxFrequency = frequency;
72
- collections.metadata.dominantType = type;
73
- }
74
- });
75
-
76
- // Add helper methods
77
- addCollectionHelpers(collections);
78
-
79
- return collections;
80
- }
81
-
82
- /**
83
- * Get context information for an element
84
- */
85
- function getElementContext(sequence, position) {
86
- const context = {
87
- position,
88
- previousElement: position > 0 ? sequence[position - 1] : null,
89
- nextElement: position < sequence.length - 1 ? sequence[position + 1] : null,
90
- nearestHeading: null,
91
- };
92
-
93
- // Find nearest preceding heading
94
- for (let i = position - 1; i >= 0; i--) {
95
- if (sequence[i].type === "heading") {
96
- context.nearestHeading = sequence[i];
97
- break;
98
- }
99
- }
100
-
101
- return context;
102
- }
103
-
104
- /**
105
- * Add helper methods to collections
106
- */
107
- function addCollectionHelpers(collections) {
108
- // Get headings of specific level
109
- collections.getHeadingsByLevel = function (level) {
110
- return this.headings.filter((h) => h.level === level);
111
- };
112
-
113
- // Get elements by heading context
114
- collections.getElementsByHeadingContext = function (headingFilter) {
115
- const allElements = [
116
- ...this.paragraphs,
117
- ...Object.values(this.images).flat(),
118
- ...this.lists,
119
- ];
120
-
121
- return allElements.filter(
122
- (el) =>
123
- el.context?.nearestHeading && headingFilter(el.context.nearestHeading)
124
- );
125
- };
126
- }
127
-
128
- export {
129
- processByType
130
- };