@uniweb/semantic-parser 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,9 @@
1
1
  /**
2
2
  * Pre-built extractors for common component patterns
3
+ *
4
+ * All extractors work with the flat content structure:
5
+ * - Root level: title, pretitle, subtitle, paragraphs, links, imgs, items, etc.
6
+ * - Items array: each item has flat structure (title, paragraphs, etc.)
3
7
  */
4
8
 
5
9
  import { first, joinParagraphs } from "./helpers.js";
@@ -12,18 +16,16 @@ import { first, joinParagraphs } from "./helpers.js";
12
16
  * @returns {Object} Hero component data
13
17
  */
14
18
  function hero(parsed) {
15
- const main = parsed.groups?.main;
16
-
17
19
  return {
18
- title: main?.header?.title || null,
19
- subtitle: main?.header?.subtitle || null,
20
- kicker: main?.header?.pretitle || null,
21
- description: main?.body?.paragraphs || [],
22
- image: first(main?.body?.imgs)?.url || null,
23
- imageAlt: first(main?.body?.imgs)?.alt || null,
24
- banner: main?.banner?.url || null,
25
- cta: first(main?.body?.links) || null,
26
- button: first(main?.body?.buttons) || null,
20
+ title: parsed?.title || null,
21
+ subtitle: parsed?.subtitle || null,
22
+ kicker: parsed?.pretitle || null,
23
+ description: parsed?.paragraphs || [],
24
+ image: first(parsed?.imgs)?.url || null,
25
+ imageAlt: first(parsed?.imgs)?.alt || null,
26
+ banner: null, // Banner detection would need to be added separately
27
+ cta: first(parsed?.links) || null,
28
+ button: first(parsed?.buttons) || null,
27
29
  };
28
30
  }
29
31
 
@@ -40,30 +42,30 @@ function hero(parsed) {
40
42
  function card(parsed, options = {}) {
41
43
  const { useItems = false, itemIndex } = options;
42
44
 
43
- const extractCard = (group) => {
44
- if (!group) return null;
45
+ const extractCard = (content) => {
46
+ if (!content) return null;
45
47
 
46
48
  return {
47
- title: group.header?.title || null,
48
- subtitle: group.header?.subtitle || null,
49
- description: group.body?.paragraphs || [],
50
- image: first(group.body?.imgs)?.url || null,
51
- imageAlt: first(group.body?.imgs)?.alt || null,
52
- icon: first(group.body?.icons) || null,
53
- link: first(group.body?.links) || null,
54
- button: first(group.body?.buttons) || null,
49
+ title: content.title || null,
50
+ subtitle: content.subtitle || null,
51
+ description: content.paragraphs || [],
52
+ image: first(content.imgs)?.url || null,
53
+ imageAlt: first(content.imgs)?.alt || null,
54
+ icon: first(content.icons) || null,
55
+ link: first(content.links) || null,
56
+ button: first(content.buttons) || null,
55
57
  };
56
58
  };
57
59
 
58
60
  if (useItems) {
59
- const items = parsed.groups?.items || [];
61
+ const items = parsed?.items || [];
60
62
  if (itemIndex !== undefined) {
61
63
  return extractCard(items[itemIndex]);
62
64
  }
63
65
  return items.map(extractCard).filter(Boolean);
64
66
  }
65
67
 
66
- return extractCard(parsed.groups?.main);
68
+ return extractCard(parsed);
67
69
  }
68
70
 
69
71
  /**
@@ -74,19 +76,17 @@ function card(parsed, options = {}) {
74
76
  * @returns {Object} Article data
75
77
  */
76
78
  function article(parsed) {
77
- const main = parsed.groups?.main;
78
-
79
79
  return {
80
- title: main?.header?.title || null,
81
- subtitle: main?.header?.subtitle || null,
82
- kicker: main?.header?.pretitle || null,
83
- author: main?.metadata?.author || null,
84
- date: main?.metadata?.date || null,
85
- banner: main?.banner?.url || null,
86
- content: main?.body?.paragraphs || [],
87
- images: main?.body?.imgs || [],
88
- videos: main?.body?.videos || [],
89
- links: main?.body?.links || [],
80
+ title: parsed?.title || null,
81
+ subtitle: parsed?.subtitle || null,
82
+ kicker: parsed?.pretitle || null,
83
+ author: null, // Would need metadata support
84
+ date: null, // Would need metadata support
85
+ banner: null, // Banner detection would need to be added separately
86
+ content: parsed?.paragraphs || [],
87
+ images: parsed?.imgs || [],
88
+ videos: parsed?.videos || [],
89
+ links: parsed?.links || [],
90
90
  };
91
91
  }
92
92
 
@@ -98,14 +98,13 @@ function article(parsed) {
98
98
  * @returns {Array} Array of stat objects
99
99
  */
100
100
  function stats(parsed) {
101
- const items = parsed.groups?.items || [];
101
+ const items = parsed?.items || [];
102
102
 
103
103
  return items
104
104
  .map((item) => ({
105
- value: item.header?.title || null,
106
- label:
107
- item.header?.subtitle || first(item.body?.paragraphs) || null,
108
- description: item.body?.paragraphs || [],
105
+ value: item.title || null,
106
+ label: item.subtitle || first(item.paragraphs) || null,
107
+ description: item.paragraphs || [],
109
108
  }))
110
109
  .filter((stat) => stat.value);
111
110
  }
@@ -118,17 +117,17 @@ function stats(parsed) {
118
117
  * @returns {Array} Navigation items
119
118
  */
120
119
  function navigation(parsed) {
121
- const items = parsed.groups?.items || [];
120
+ const items = parsed?.items || [];
122
121
 
123
122
  return items
124
123
  .map((item) => {
125
124
  const navItem = {
126
- label: item.header?.title || null,
127
- href: first(item.body?.links)?.href || null,
125
+ label: item.title || null,
126
+ href: first(item.links)?.href || null,
128
127
  };
129
128
 
130
129
  // Extract children from nested lists
131
- const firstList = first(item.body?.lists);
130
+ const firstList = first(item.lists);
132
131
  if (firstList && firstList.length > 0) {
133
132
  navItem.children = firstList
134
133
  .map((listItem) => ({
@@ -152,16 +151,16 @@ function navigation(parsed) {
152
151
  * @returns {Array} Feature items
153
152
  */
154
153
  function features(parsed) {
155
- const items = parsed.groups?.items || [];
154
+ const items = parsed?.items || [];
156
155
 
157
156
  return items
158
157
  .map((item) => ({
159
- title: item.header?.title || null,
160
- subtitle: item.header?.subtitle || null,
161
- description: item.body?.paragraphs || [],
162
- icon: first(item.body?.icons) || null,
163
- image: first(item.body?.imgs)?.url || null,
164
- link: first(item.body?.links) || null,
158
+ title: item.title || null,
159
+ subtitle: item.subtitle || null,
160
+ description: item.paragraphs || [],
161
+ icon: first(item.icons) || null,
162
+ image: first(item.imgs)?.url || null,
163
+ link: first(item.links) || null,
165
164
  }))
166
165
  .filter((feature) => feature.title);
167
166
  }
@@ -178,25 +177,25 @@ function features(parsed) {
178
177
  function testimonial(parsed, options = {}) {
179
178
  const { useItems = false } = options;
180
179
 
181
- const extractTestimonial = (group) => {
182
- if (!group) return null;
180
+ const extractTestimonial = (content) => {
181
+ if (!content) return null;
183
182
 
184
183
  return {
185
- quote: group.body?.paragraphs || [],
186
- author: group.header?.title || null,
187
- role: group.header?.subtitle || null,
188
- company: group.header?.pretitle || null,
189
- image: first(group.body?.imgs)?.url || null,
190
- imageAlt: first(group.body?.imgs)?.alt || null,
184
+ quote: content.paragraphs || [],
185
+ author: content.title || null,
186
+ role: content.subtitle || null,
187
+ company: content.pretitle || null,
188
+ image: first(content.imgs)?.url || null,
189
+ imageAlt: first(content.imgs)?.alt || null,
191
190
  };
192
191
  };
193
192
 
194
193
  if (useItems) {
195
- const items = parsed.groups?.items || [];
194
+ const items = parsed?.items || [];
196
195
  return items.map(extractTestimonial).filter(Boolean);
197
196
  }
198
197
 
199
- return extractTestimonial(parsed.groups?.main);
198
+ return extractTestimonial(parsed);
200
199
  }
201
200
 
202
201
  /**
@@ -207,13 +206,13 @@ function testimonial(parsed, options = {}) {
207
206
  * @returns {Array} FAQ items
208
207
  */
209
208
  function faq(parsed) {
210
- const items = parsed.groups?.items || [];
209
+ const items = parsed?.items || [];
211
210
 
212
211
  return items
213
212
  .map((item) => ({
214
- question: item.header?.title || null,
215
- answer: item.body?.paragraphs || [],
216
- links: item.body?.links || [],
213
+ question: item.title || null,
214
+ answer: item.paragraphs || [],
215
+ links: item.links || [],
217
216
  }))
218
217
  .filter((item) => item.question);
219
218
  }
@@ -226,16 +225,16 @@ function faq(parsed) {
226
225
  * @returns {Array} Pricing tiers
227
226
  */
228
227
  function pricing(parsed) {
229
- const items = parsed.groups?.items || [];
228
+ const items = parsed?.items || [];
230
229
 
231
230
  return items
232
231
  .map((item) => {
233
- const firstList = first(item.body?.lists);
232
+ const firstList = first(item.lists);
234
233
 
235
234
  return {
236
- name: item.header?.title || null,
237
- price: item.header?.subtitle || null,
238
- description: first(item.body?.paragraphs) || null,
235
+ name: item.title || null,
236
+ price: item.subtitle || null,
237
+ description: first(item.paragraphs) || null,
239
238
  features: firstList
240
239
  ? firstList
241
240
  .map((listItem) =>
@@ -243,13 +242,9 @@ function pricing(parsed) {
243
242
  )
244
243
  .filter(Boolean)
245
244
  : [],
246
- cta:
247
- first(item.body?.links) ||
248
- first(item.body?.buttons) ||
249
- null,
245
+ cta: first(item.links) || first(item.buttons) || null,
250
246
  highlighted:
251
- item.header?.pretitle?.toLowerCase().includes("popular") ||
252
- false,
247
+ item.pretitle?.toLowerCase().includes("popular") || false,
253
248
  };
254
249
  })
255
250
  .filter((tier) => tier.name);
@@ -263,17 +258,17 @@ function pricing(parsed) {
263
258
  * @returns {Array} Team members
264
259
  */
265
260
  function team(parsed) {
266
- const items = parsed.groups?.items || [];
261
+ const items = parsed?.items || [];
267
262
 
268
263
  return items
269
264
  .map((item) => ({
270
- name: item.header?.title || null,
271
- role: item.header?.subtitle || null,
272
- department: item.header?.pretitle || null,
273
- bio: item.body?.paragraphs || [],
274
- image: first(item.body?.imgs)?.url || null,
275
- imageAlt: first(item.body?.imgs)?.alt || null,
276
- links: item.body?.links || [],
265
+ name: item.title || null,
266
+ role: item.subtitle || null,
267
+ department: item.pretitle || null,
268
+ bio: item.paragraphs || [],
269
+ image: first(item.imgs)?.url || null,
270
+ imageAlt: first(item.imgs)?.alt || null,
271
+ links: item.links || [],
277
272
  }))
278
273
  .filter((member) => member.name);
279
274
  }
@@ -292,14 +287,14 @@ function gallery(parsed, options = {}) {
292
287
  const images = [];
293
288
 
294
289
  if (source === "main" || source === "all") {
295
- const mainImages = parsed.groups?.main?.body?.imgs || [];
290
+ const mainImages = parsed?.imgs || [];
296
291
  images.push(...mainImages);
297
292
  }
298
293
 
299
294
  if (source === "items" || source === "all") {
300
- const items = parsed.groups?.items || [];
295
+ const items = parsed?.items || [];
301
296
  items.forEach((item) => {
302
- const itemImages = item.body?.imgs || [];
297
+ const itemImages = item.imgs || [];
303
298
  images.push(...itemImages);
304
299
  });
305
300
  }
@@ -315,26 +310,24 @@ function gallery(parsed, options = {}) {
315
310
  * Extract content in legacy Article class format
316
311
  * Used for backward compatibility with existing components
317
312
  *
318
- * This extractor transforms the new parser output into the exact format
313
+ * This extractor transforms the new flat parser output into the nested format
319
314
  * used by the legacy Article class, enabling drop-in replacement without
320
315
  * breaking existing components.
321
316
  *
322
- * @param {Object} parsed - Parsed content from parseContent()
323
- * @returns {Object} Legacy format { main, items }
317
+ * @param {Object} parsed - Parsed content from parseContent() (flat structure)
318
+ * @returns {Object} Legacy format { main, items } with nested header/body structure
324
319
  *
325
320
  * @example
326
321
  * const { parseContent, mappers } = require('@uniweb/semantic-parser');
327
- * const parsed = parseContent(doc, { pretitleLevel: 2, parseCodeAsJson: true });
322
+ * const parsed = parseContent(doc);
328
323
  * const legacy = mappers.extractors.legacy(parsed);
329
- * // Returns: { main: {...}, items: [...] }
324
+ * // Returns: { main: { header: {...}, body: {...} }, items: [...] }
330
325
  */
331
326
  function legacy(parsed) {
332
- const groups = parsed.groups || {};
333
-
334
- const transformGroup = (group) => {
335
- if (!group) return null;
327
+ const transformToNested = (content) => {
328
+ if (!content) return null;
336
329
 
337
- let imgs = group.body?.imgs || [];
330
+ let imgs = content.imgs || [];
338
331
  let banner = imgs.filter((item) => {
339
332
  return (item.role = "banner");
340
333
  })?.[0];
@@ -343,41 +336,41 @@ function legacy(parsed) {
343
336
 
344
337
  return {
345
338
  header: {
346
- title: group.header?.title || "",
347
- subtitle: group.header?.subtitle || "",
348
- subtitle2: group.header?.subtitle2 || "",
349
- pretitle: group.header?.pretitle || "",
339
+ title: content.title || "",
340
+ subtitle: content.subtitle || "",
341
+ subtitle2: content.subtitle2 || "",
342
+ pretitle: content.pretitle || "",
350
343
  // Auto-fill description (legacy behavior)
351
344
  description:
352
- group.header?.subtitle2 ||
353
- first(group.body?.paragraphs) ||
345
+ content.subtitle2 ||
346
+ first(content.paragraphs) ||
354
347
  "",
355
- alignment: group.header?.alignment || "",
348
+ alignment: content.alignment || "",
356
349
  },
357
350
  banner,
358
351
  body: {
359
- paragraphs: group.body?.paragraphs || [],
360
- headings: group.body?.headings || [],
352
+ paragraphs: content.paragraphs || [],
353
+ headings: content.headings || [],
361
354
  imgs,
362
- videos: group.body?.videos || [],
363
- lists: group.body?.lists || [],
364
- links: group.body?.links || [],
365
- icons: group.body?.icons || [],
366
- buttons: group.body?.buttons || [],
367
- cards: group.body?.cards || [],
368
- documents: group.body?.documents || [],
369
- forms: group.body?.forms || [],
370
- form: first(group.body?.forms) || null,
371
- quotes: group.body?.quotes || [],
372
- properties: group.body?.properties || {},
373
- propertyBlocks: group.body?.propertyBlocks || [],
355
+ videos: content.videos || [],
356
+ lists: content.lists || [],
357
+ links: content.links || [],
358
+ icons: content.icons || [],
359
+ buttons: content.buttons || [],
360
+ cards: content.cards || [],
361
+ documents: content.documents || [],
362
+ forms: content.forms || [],
363
+ form: first(content.forms) || null,
364
+ quotes: content.quotes || [],
365
+ properties: content.properties || {},
366
+ propertyBlocks: content.propertyBlocks || [],
374
367
  },
375
368
  };
376
369
  };
377
370
 
378
371
  return {
379
- main: transformGroup(groups.main),
380
- items: (groups.items || []).map(transformGroup),
372
+ main: transformToNested(parsed),
373
+ items: (parsed?.items || []).map(transformToNested),
381
374
  };
382
375
  }
383
376
 
@@ -1,42 +1,110 @@
1
+ /**
2
+ * Flatten a group's nested structure to a flat object
3
+ * @param {Object} group Processed group with { header, body, metadata }
4
+ * @returns {Object} Flat content object
5
+ */
6
+ function flattenGroup(group) {
7
+ if (!group) return null;
8
+ return {
9
+ title: group.header.title || '',
10
+ pretitle: group.header.pretitle || '',
11
+ subtitle: group.header.subtitle || '',
12
+ subtitle2: group.header.subtitle2 || '',
13
+ alignment: group.header.alignment || null,
14
+ paragraphs: group.body.paragraphs || [],
15
+ links: group.body.links || [],
16
+ imgs: group.body.imgs || [],
17
+ icons: group.body.icons || [],
18
+ lists: group.body.lists || [],
19
+ videos: group.body.videos || [],
20
+ buttons: group.body.buttons || [],
21
+ data: group.body.data || {},
22
+ cards: group.body.cards || [],
23
+ documents: group.body.documents || [],
24
+ forms: group.body.forms || [],
25
+ quotes: group.body.quotes || [],
26
+ headings: group.body.headings || [],
27
+ };
28
+ }
29
+
1
30
  /**
2
31
  * Transform a sequence into content groups with semantic structure
3
32
  * @param {Array} sequence Flat sequence of elements
4
33
  * @param {Object} options Parsing options
5
- * @returns {Object} Content organized into groups with identified main content
34
+ * @returns {Object} Flat content object with items array
6
35
  */
7
36
  function processGroups(sequence, options = {}) {
8
- const result = {
9
- main: null,
10
- items: [],
11
- metadata: {
12
- dividerMode: false,
13
- groups: 0,
14
- },
15
- };
16
-
17
- if (!sequence.length) return result;
37
+ // Empty content returns flat empty structure
38
+ if (!sequence.length) {
39
+ return {
40
+ title: '',
41
+ pretitle: '',
42
+ subtitle: '',
43
+ subtitle2: '',
44
+ alignment: null,
45
+ paragraphs: [],
46
+ links: [],
47
+ imgs: [],
48
+ icons: [],
49
+ lists: [],
50
+ videos: [],
51
+ buttons: [],
52
+ data: {},
53
+ cards: [],
54
+ documents: [],
55
+ forms: [],
56
+ quotes: [],
57
+ headings: [],
58
+ items: [],
59
+ };
60
+ }
18
61
 
19
62
  const groups = splitBySlices(sequence);
20
63
 
21
- // Process each group's structure
64
+ // Process each group's structure (still nested internally)
22
65
  const processedGroups = groups.map((group) => processGroupContent(group));
23
66
 
24
- // Special handling for first group in divider mode
25
- if (result.metadata.dividerMode && groups.startsWithDivider) {
26
- result.items = processedGroups;
67
+ // Determine main vs items
68
+ let mainGroup = null;
69
+ let itemGroups = [];
70
+
71
+ const shouldBeMain = identifyMainContent(processedGroups);
72
+ if (shouldBeMain) {
73
+ mainGroup = processedGroups[0];
74
+ itemGroups = processedGroups.slice(1);
27
75
  } else {
28
- // Organize into main content and items
29
- const shouldBeMain = identifyMainContent(processedGroups);
30
- if (shouldBeMain) {
31
- result.main = processedGroups[0];
32
- result.items = processedGroups.slice(1);
33
- } else {
34
- result.items = processedGroups;
35
- }
76
+ itemGroups = processedGroups;
36
77
  }
37
78
 
38
- // result.metadata.groups = processedGroups.length;
39
- return result;
79
+ // Flatten main content (or return empty flat structure)
80
+ const flatMain = flattenGroup(mainGroup) || {
81
+ title: '',
82
+ pretitle: '',
83
+ subtitle: '',
84
+ subtitle2: '',
85
+ alignment: null,
86
+ paragraphs: [],
87
+ links: [],
88
+ imgs: [],
89
+ icons: [],
90
+ lists: [],
91
+ videos: [],
92
+ buttons: [],
93
+ data: {},
94
+ cards: [],
95
+ documents: [],
96
+ forms: [],
97
+ quotes: [],
98
+ headings: [],
99
+ };
100
+
101
+ // Flatten items
102
+ const flatItems = itemGroups.map(flattenGroup);
103
+
104
+ return {
105
+ ...flatMain,
106
+ items: flatItems,
107
+ };
40
108
  }
41
109
 
42
110
  function splitBySlices(sequence) {
@@ -168,8 +236,7 @@ function processGroupContent(elements) {
168
236
  links: [],
169
237
  lists: [],
170
238
  buttons: [],
171
- properties: {},
172
- propertyBlocks: [],
239
+ data: {},
173
240
  cards: [],
174
241
  documents: [],
175
242
  forms: [],
@@ -274,10 +341,18 @@ function processGroupContent(elements) {
274
341
  body.quotes.push(quoteContent.body);
275
342
  break;
276
343
 
344
+ case "dataBlock":
345
+ // Pre-parsed structured data from content-reader
346
+ body.data[element.tag] = element.data;
347
+ break;
348
+
277
349
  case "codeBlock":
278
- const codeData = element.text;
279
- body.properties = codeData; // Last one
280
- body.propertyBlocks.push(codeData); // All of them
350
+ // Fallback: tagged code blocks where parsing failed at build time
351
+ // Untagged blocks stay in sequence for display
352
+ const tag = element.attrs?.tag;
353
+ if (tag) {
354
+ body.data[tag] = element.text;
355
+ }
281
356
  break;
282
357
 
283
358
  case "form":
@@ -1,3 +1,52 @@
1
+ import { parse as parseYaml } from "yaml";
2
+
3
+ /**
4
+ * Get code block data - prefers pre-parsed attrs.data, falls back to parsing text
5
+ *
6
+ * Content can come from two sources:
7
+ * 1. Pre-parsed at build time: attrs.data contains parsed JS object
8
+ * 2. Legacy/runtime: text needs to be parsed based on language
9
+ *
10
+ * @param {string} text - Raw code block text
11
+ * @param {Object} attrs - Code block attributes (language, tag, data)
12
+ * @returns {*} Parsed data or raw text
13
+ */
14
+ function getCodeBlockData(text, attrs) {
15
+ const { language, tag, data } = attrs || {};
16
+
17
+ // Only process tagged blocks
18
+ if (!tag) {
19
+ return text;
20
+ }
21
+
22
+ // Prefer pre-parsed data from build time (attrs.data)
23
+ if (data !== undefined) {
24
+ return data;
25
+ }
26
+
27
+ // Fallback: parse text at runtime (for backwards compatibility)
28
+ const lang = (language || "").toLowerCase();
29
+
30
+ if (lang === "json") {
31
+ try {
32
+ return JSON.parse(text);
33
+ } catch {
34
+ return text;
35
+ }
36
+ }
37
+
38
+ if (lang === "yaml" || lang === "yml") {
39
+ try {
40
+ return parseYaml(text);
41
+ } catch {
42
+ return text;
43
+ }
44
+ }
45
+
46
+ // Unknown language - return raw text
47
+ return text;
48
+ }
49
+
1
50
  /**
2
51
  * Process a ProseMirror/TipTap document into a flat sequence
3
52
  * @param {Object} doc ProseMirror document
@@ -79,20 +128,19 @@ function createSequenceElement(node, options = {}) {
79
128
  attrs,
80
129
  };
81
130
 
82
- case "codeBlock":
83
- let textContent = getTextContent(content, options);
84
- let parsed = "";
85
-
86
- //Try pasre json if possible
87
- try {
88
- parsed = JSON.parse(`${textContent}`);
89
- } catch (err) {
90
- parsed = textContent;
91
- }
131
+ case "dataBlock":
132
+ // Pre-parsed structured data from content-reader
133
+ return {
134
+ type: "dataBlock",
135
+ data: attrs.data,
136
+ tag: attrs.tag,
137
+ };
92
138
 
139
+ case "codeBlock":
140
+ const codeText = getTextContent(content, options);
93
141
  return {
94
142
  type: "codeBlock",
95
- text: parsed,
143
+ text: getCodeBlockData(codeText, attrs),
96
144
  attrs,
97
145
  };
98
146