@uniweb/semantic-parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,312 @@
1
+ /**
2
+ * Path-based accessor for extracting values from parsed content
3
+ */
4
+
5
+ import { applyType, validateType } from './types.js';
6
+
7
+ /**
8
+ * Parse a path string into segments, handling array indices
9
+ * @param {string} path - Path string (e.g., 'groups.main.body.imgs[0].url')
10
+ * @returns {Array} Array of path segments
11
+ */
12
+ function parsePath(path) {
13
+ const segments = [];
14
+ const parts = path.split('.');
15
+
16
+ for (const part of parts) {
17
+ // Check for array index notation: key[0]
18
+ const match = part.match(/^(.+?)\[(\d+)\]$/);
19
+ if (match) {
20
+ segments.push({ key: match[1], type: 'object' });
21
+ segments.push({ index: parseInt(match[2], 10), type: 'array' });
22
+ } else {
23
+ segments.push({ key: part, type: 'object' });
24
+ }
25
+ }
26
+
27
+ return segments;
28
+ }
29
+
30
+ /**
31
+ * Get value at path from parsed content
32
+ * @param {Object} parsed - Parsed content from parseContent()
33
+ * @param {string} path - Path to value (e.g., 'groups.main.header.title')
34
+ * @param {Object} options - Options for extraction
35
+ * @param {*} options.defaultValue - Default value if path doesn't exist
36
+ * @param {Function} options.transform - Transformation function to apply to value
37
+ * @param {boolean} options.required - Throw error if value is missing
38
+ * @param {string} options.type - Field type for automatic transformation
39
+ * @param {boolean} options.treatEmptyAsDefault - Treat empty strings as missing
40
+ * @returns {*} Value at path
41
+ */
42
+ function getByPath(parsed, path, options = {}) {
43
+ const {
44
+ defaultValue,
45
+ transform,
46
+ required = false,
47
+ type,
48
+ treatEmptyAsDefault = false,
49
+ ...typeOptions
50
+ } = options;
51
+
52
+ if (!parsed || !path) {
53
+ if (required) {
54
+ throw new Error('Path is required');
55
+ }
56
+ return defaultValue;
57
+ }
58
+
59
+ const segments = parsePath(path);
60
+ let current = parsed;
61
+
62
+ for (let i = 0; i < segments.length; i++) {
63
+ const segment = segments[i];
64
+
65
+ if (current === null || current === undefined) {
66
+ if (required) {
67
+ throw new Error(`Required field missing at path: ${path}`);
68
+ }
69
+ return defaultValue;
70
+ }
71
+
72
+ if (segment.type === 'array') {
73
+ if (!Array.isArray(current)) {
74
+ if (required) {
75
+ throw new Error(`Expected array at path segment ${i} in: ${path}`);
76
+ }
77
+ return defaultValue;
78
+ }
79
+ current = current[segment.index];
80
+ } else {
81
+ current = current[segment.key];
82
+ }
83
+ }
84
+
85
+ let value = current !== undefined ? current : defaultValue;
86
+
87
+ // Treat empty strings as missing if requested
88
+ if (treatEmptyAsDefault && value === '') {
89
+ value = defaultValue;
90
+ }
91
+
92
+ if (required && (value === undefined || value === null || value === '')) {
93
+ throw new Error(`Required field missing at path: ${path}`);
94
+ }
95
+
96
+ // Apply type transformation if specified
97
+ if (type && value !== undefined && value !== null) {
98
+ value = applyType(value, type, typeOptions);
99
+ }
100
+
101
+ // Apply custom transform after type transformation
102
+ return transform ? transform(value) : value;
103
+ }
104
+
105
+ /**
106
+ * Extract multiple values using a schema
107
+ * @param {Object} parsed - Parsed content from parseContent()
108
+ * @param {Object} schema - Schema defining paths and transformations
109
+ * @param {Object} options - Extraction options
110
+ * @param {string} options.mode - Execution mode ('visual-editor' or 'build')
111
+ * @returns {Object} Extracted values
112
+ *
113
+ * @example
114
+ * const schema = {
115
+ * title: {
116
+ * path: 'groups.main.header.title',
117
+ * type: 'plaintext',
118
+ * maxLength: 60
119
+ * },
120
+ * image: {
121
+ * path: 'groups.main.body.imgs[0].url',
122
+ * type: 'image',
123
+ * defaultValue: '/placeholder.jpg'
124
+ * },
125
+ * description: {
126
+ * path: 'groups.main.body.paragraphs',
127
+ * type: 'excerpt',
128
+ * maxLength: 150
129
+ * }
130
+ * };
131
+ * const data = extractBySchema(parsed, schema, { mode: 'visual-editor' });
132
+ */
133
+ function extractBySchema(parsed, schema, options = {}) {
134
+ const { mode = 'visual-editor' } = options;
135
+ const result = {};
136
+ const validationResults = [];
137
+
138
+ for (const [key, config] of Object.entries(schema)) {
139
+ // Allow shorthand: key: 'path.to.value'
140
+ if (typeof config === 'string') {
141
+ result[key] = getByPath(parsed, config);
142
+ } else {
143
+ // Full config: { path, type, defaultValue, transform, required, ... }
144
+ const { path, type, ...fieldOptions } = config;
145
+
146
+ // Extract value
147
+ result[key] = getByPath(parsed, path, { type, ...fieldOptions });
148
+
149
+ // Validate if type specified and in build mode
150
+ if (type && mode === 'build') {
151
+ const rawValue = getByPath(parsed, path, {
152
+ defaultValue: fieldOptions.defaultValue
153
+ });
154
+ const errors = validateType(rawValue, type, {
155
+ ...fieldOptions,
156
+ fieldName: key
157
+ }, mode);
158
+ validationResults.push(...errors);
159
+ }
160
+ }
161
+ }
162
+
163
+ // In build mode, log validation results
164
+ if (mode === 'build' && validationResults.length > 0) {
165
+ const errors = validationResults.filter(v => v.severity === 'error');
166
+ const warnings = validationResults.filter(v => v.severity === 'warning');
167
+
168
+ if (warnings.length > 0) {
169
+ console.warn('Content validation warnings:');
170
+ warnings.forEach(w => {
171
+ console.warn(` [${w.field}] ${w.message}${w.autoFix ? ' (auto-fixed)' : ''}`);
172
+ });
173
+ }
174
+
175
+ if (errors.length > 0) {
176
+ console.error('Content validation errors:');
177
+ errors.forEach(e => {
178
+ console.error(` [${e.field}] ${e.message}`);
179
+ });
180
+ }
181
+ }
182
+
183
+ return result;
184
+ }
185
+
186
+ /**
187
+ * Check if a path exists in parsed content
188
+ * @param {Object} parsed - Parsed content
189
+ * @param {string} path - Path to check
190
+ * @returns {boolean} True if path exists and has a non-null/undefined value
191
+ */
192
+ function hasPath(parsed, path) {
193
+ try {
194
+ const value = getByPath(parsed, path);
195
+ return value !== null && value !== undefined;
196
+ } catch {
197
+ return false;
198
+ }
199
+ }
200
+
201
+ /**
202
+ * Get multiple paths, return first that exists
203
+ * @param {Object} parsed - Parsed content
204
+ * @param {Array<string>} paths - Array of paths to try
205
+ * @param {*} defaultValue - Default if none exist
206
+ * @returns {*} First existing value or default
207
+ */
208
+ function getFirstExisting(parsed, paths, defaultValue = null) {
209
+ for (const path of paths) {
210
+ if (hasPath(parsed, path)) {
211
+ return getByPath(parsed, path);
212
+ }
213
+ }
214
+ return defaultValue;
215
+ }
216
+
217
+ /**
218
+ * Extract values from array of items using same path
219
+ * @param {Object} parsed - Parsed content
220
+ * @param {string} arrayPath - Path to array
221
+ * @param {string|Object} itemConfig - Path or config for each item
222
+ * @returns {Array} Extracted values
223
+ *
224
+ * @example
225
+ * // Get all item titles
226
+ * mapArray(parsed, 'groups.items', 'header.title')
227
+ *
228
+ * // Get objects from each item
229
+ * mapArray(parsed, 'groups.items', {
230
+ * title: 'header.title',
231
+ * text: { path: 'body.paragraphs', transform: p => p.join(' ') }
232
+ * })
233
+ */
234
+ function mapArray(parsed, arrayPath, itemConfig) {
235
+ const array = getByPath(parsed, arrayPath, { defaultValue: [] });
236
+
237
+ if (!Array.isArray(array)) {
238
+ return [];
239
+ }
240
+
241
+ return array.map(item => {
242
+ if (typeof itemConfig === 'string') {
243
+ return getByPath({ item }, `item.${itemConfig}`);
244
+ } else {
245
+ return extractBySchema({ item },
246
+ Object.entries(itemConfig).reduce((acc, [key, config]) => {
247
+ if (typeof config === 'string') {
248
+ acc[key] = `item.${config}`;
249
+ } else {
250
+ acc[key] = { ...config, path: `item.${config.path}` };
251
+ }
252
+ return acc;
253
+ }, {})
254
+ );
255
+ }
256
+ });
257
+ }
258
+
259
+ /**
260
+ * Validate content against schema without extracting
261
+ * Useful for providing UI hints in visual editor
262
+ * @param {Object} parsed - Parsed content
263
+ * @param {Object} schema - Schema to validate against
264
+ * @param {Object} options - Validation options
265
+ * @param {string} options.mode - Execution mode ('visual-editor' or 'build')
266
+ * @returns {Object} Validation results by field
267
+ *
268
+ * @example
269
+ * const hints = validateSchema(parsed, schema);
270
+ * // {
271
+ * // title: [{ type: 'max_length', severity: 'info', message: '...' }],
272
+ * // image: [{ type: 'required', severity: 'error', message: '...' }]
273
+ * // }
274
+ */
275
+ function validateSchema(parsed, schema, options = {}) {
276
+ const { mode = 'visual-editor' } = options;
277
+ const results = {};
278
+
279
+ for (const [key, config] of Object.entries(schema)) {
280
+ if (typeof config === 'string') {
281
+ continue; // No validation for shorthand
282
+ }
283
+
284
+ const { path, type, ...fieldOptions } = config;
285
+
286
+ if (type) {
287
+ const rawValue = getByPath(parsed, path, {
288
+ defaultValue: fieldOptions.defaultValue
289
+ });
290
+
291
+ const errors = validateType(rawValue, type, {
292
+ ...fieldOptions,
293
+ fieldName: key
294
+ }, mode);
295
+
296
+ if (errors.length > 0) {
297
+ results[key] = errors;
298
+ }
299
+ }
300
+ }
301
+
302
+ return results;
303
+ }
304
+
305
+ export {
306
+ getByPath,
307
+ extractBySchema,
308
+ validateSchema,
309
+ hasPath,
310
+ getFirstExisting,
311
+ mapArray
312
+ };
@@ -0,0 +1,397 @@
1
+ /**
2
+ * Pre-built extractors for common component patterns
3
+ */
4
+
5
+ import { first, joinParagraphs } from "./helpers.js";
6
+
7
+ /**
8
+ * Extract hero component data
9
+ * Common pattern: Large header with title, subtitle, image, and CTA
10
+ *
11
+ * @param {Object} parsed - Parsed content from parseContent()
12
+ * @returns {Object} Hero component data
13
+ */
14
+ function hero(parsed) {
15
+ const main = parsed.groups?.main;
16
+
17
+ return {
18
+ title: main?.header?.title || null,
19
+ subtitle: main?.header?.subtitle || null,
20
+ kicker: main?.header?.pretitle || null,
21
+ description: main?.body?.paragraphs || [],
22
+ image: first(main?.body?.imgs)?.url || null,
23
+ imageAlt: first(main?.body?.imgs)?.alt || null,
24
+ banner: main?.banner?.url || null,
25
+ cta: first(main?.body?.links) || null,
26
+ button: first(main?.body?.buttons) || null,
27
+ };
28
+ }
29
+
30
+ /**
31
+ * Extract card component data
32
+ * Common pattern: Title, description, image, and link
33
+ *
34
+ * @param {Object} parsed - Parsed content from parseContent()
35
+ * @param {Object} options - Extraction options
36
+ * @param {boolean} options.useItems - Extract from items instead of main
37
+ * @param {number} options.itemIndex - Specific item index to extract from
38
+ * @returns {Object|Array} Card data or array of cards if useItems=true
39
+ */
40
+ function card(parsed, options = {}) {
41
+ const { useItems = false, itemIndex } = options;
42
+
43
+ const extractCard = (group) => {
44
+ if (!group) return null;
45
+
46
+ return {
47
+ title: group.header?.title || null,
48
+ subtitle: group.header?.subtitle || null,
49
+ description: group.body?.paragraphs || [],
50
+ image: first(group.body?.imgs)?.url || null,
51
+ imageAlt: first(group.body?.imgs)?.alt || null,
52
+ icon: first(group.body?.icons) || null,
53
+ link: first(group.body?.links) || null,
54
+ button: first(group.body?.buttons) || null,
55
+ };
56
+ };
57
+
58
+ if (useItems) {
59
+ const items = parsed.groups?.items || [];
60
+ if (itemIndex !== undefined) {
61
+ return extractCard(items[itemIndex]);
62
+ }
63
+ return items.map(extractCard).filter(Boolean);
64
+ }
65
+
66
+ return extractCard(parsed.groups?.main);
67
+ }
68
+
69
+ /**
70
+ * Extract article/blog content
71
+ * Common pattern: Title, author info, content blocks, images
72
+ *
73
+ * @param {Object} parsed - Parsed content from parseContent()
74
+ * @returns {Object} Article data
75
+ */
76
+ function article(parsed) {
77
+ const main = parsed.groups?.main;
78
+
79
+ return {
80
+ title: main?.header?.title || null,
81
+ subtitle: main?.header?.subtitle || null,
82
+ kicker: main?.header?.pretitle || null,
83
+ author: main?.metadata?.author || null,
84
+ date: main?.metadata?.date || null,
85
+ banner: main?.banner?.url || null,
86
+ content: main?.body?.paragraphs || [],
87
+ images: main?.body?.imgs || [],
88
+ videos: main?.body?.videos || [],
89
+ links: main?.body?.links || [],
90
+ };
91
+ }
92
+
93
+ /**
94
+ * Extract statistics/metrics data
95
+ * Common pattern: Numeric value with label
96
+ *
97
+ * @param {Object} parsed - Parsed content from parseContent()
98
+ * @returns {Array} Array of stat objects
99
+ */
100
+ function stats(parsed) {
101
+ const items = parsed.groups?.items || [];
102
+
103
+ return items
104
+ .map((item) => ({
105
+ value: item.header?.title || null,
106
+ label:
107
+ item.header?.subtitle || first(item.body?.paragraphs) || null,
108
+ description: item.body?.paragraphs || [],
109
+ }))
110
+ .filter((stat) => stat.value);
111
+ }
112
+
113
+ /**
114
+ * Extract navigation menu structure
115
+ * Common pattern: Hierarchical menu with labels, links, and optional children
116
+ *
117
+ * @param {Object} parsed - Parsed content from parseContent()
118
+ * @returns {Array} Navigation items
119
+ */
120
+ function navigation(parsed) {
121
+ const items = parsed.groups?.items || [];
122
+
123
+ return items
124
+ .map((item) => {
125
+ const navItem = {
126
+ label: item.header?.title || null,
127
+ href: first(item.body?.links)?.href || null,
128
+ };
129
+
130
+ // Extract children from nested lists
131
+ const firstList = first(item.body?.lists);
132
+ if (firstList && firstList.length > 0) {
133
+ navItem.children = firstList
134
+ .map((listItem) => ({
135
+ label: joinParagraphs(listItem.paragraphs) || null,
136
+ href: first(listItem.links)?.href || null,
137
+ icon: first(listItem.icons) || null,
138
+ }))
139
+ .filter((child) => child.label);
140
+ }
141
+
142
+ return navItem;
143
+ })
144
+ .filter((item) => item.label);
145
+ }
146
+
147
+ /**
148
+ * Extract feature list
149
+ * Common pattern: Icon/image, title, description
150
+ *
151
+ * @param {Object} parsed - Parsed content from parseContent()
152
+ * @returns {Array} Feature items
153
+ */
154
+ function features(parsed) {
155
+ const items = parsed.groups?.items || [];
156
+
157
+ return items
158
+ .map((item) => ({
159
+ title: item.header?.title || null,
160
+ subtitle: item.header?.subtitle || null,
161
+ description: item.body?.paragraphs || [],
162
+ icon: first(item.body?.icons) || null,
163
+ image: first(item.body?.imgs)?.url || null,
164
+ link: first(item.body?.links) || null,
165
+ }))
166
+ .filter((feature) => feature.title);
167
+ }
168
+
169
+ /**
170
+ * Extract testimonial data
171
+ * Common pattern: Quote, author name, role, image
172
+ *
173
+ * @param {Object} parsed - Parsed content from parseContent()
174
+ * @param {Object} options - Extraction options
175
+ * @param {boolean} options.useItems - Extract from items instead of main
176
+ * @returns {Object|Array} Testimonial data
177
+ */
178
+ function testimonial(parsed, options = {}) {
179
+ const { useItems = false } = options;
180
+
181
+ const extractTestimonial = (group) => {
182
+ if (!group) return null;
183
+
184
+ return {
185
+ quote: group.body?.paragraphs || [],
186
+ author: group.header?.title || null,
187
+ role: group.header?.subtitle || null,
188
+ company: group.header?.pretitle || null,
189
+ image: first(group.body?.imgs)?.url || null,
190
+ imageAlt: first(group.body?.imgs)?.alt || null,
191
+ };
192
+ };
193
+
194
+ if (useItems) {
195
+ const items = parsed.groups?.items || [];
196
+ return items.map(extractTestimonial).filter(Boolean);
197
+ }
198
+
199
+ return extractTestimonial(parsed.groups?.main);
200
+ }
201
+
202
+ /**
203
+ * Extract FAQ (question and answer pairs)
204
+ * Common pattern: Question as title, answer as content
205
+ *
206
+ * @param {Object} parsed - Parsed content from parseContent()
207
+ * @returns {Array} FAQ items
208
+ */
209
+ function faq(parsed) {
210
+ const items = parsed.groups?.items || [];
211
+
212
+ return items
213
+ .map((item) => ({
214
+ question: item.header?.title || null,
215
+ answer: item.body?.paragraphs || [],
216
+ links: item.body?.links || [],
217
+ }))
218
+ .filter((item) => item.question);
219
+ }
220
+
221
+ /**
222
+ * Extract pricing tier data
223
+ * Common pattern: Plan name, price, features list, CTA
224
+ *
225
+ * @param {Object} parsed - Parsed content from parseContent()
226
+ * @returns {Array} Pricing tiers
227
+ */
228
+ function pricing(parsed) {
229
+ const items = parsed.groups?.items || [];
230
+
231
+ return items
232
+ .map((item) => {
233
+ const firstList = first(item.body?.lists);
234
+
235
+ return {
236
+ name: item.header?.title || null,
237
+ price: item.header?.subtitle || null,
238
+ description: first(item.body?.paragraphs) || null,
239
+ features: firstList
240
+ ? firstList
241
+ .map((listItem) =>
242
+ joinParagraphs(listItem.paragraphs)
243
+ )
244
+ .filter(Boolean)
245
+ : [],
246
+ cta:
247
+ first(item.body?.links) ||
248
+ first(item.body?.buttons) ||
249
+ null,
250
+ highlighted:
251
+ item.header?.pretitle?.toLowerCase().includes("popular") ||
252
+ false,
253
+ };
254
+ })
255
+ .filter((tier) => tier.name);
256
+ }
257
+
258
+ /**
259
+ * Extract team member data
260
+ * Common pattern: Name, role, bio, image, social links
261
+ *
262
+ * @param {Object} parsed - Parsed content from parseContent()
263
+ * @returns {Array} Team members
264
+ */
265
+ function team(parsed) {
266
+ const items = parsed.groups?.items || [];
267
+
268
+ return items
269
+ .map((item) => ({
270
+ name: item.header?.title || null,
271
+ role: item.header?.subtitle || null,
272
+ department: item.header?.pretitle || null,
273
+ bio: item.body?.paragraphs || [],
274
+ image: first(item.body?.imgs)?.url || null,
275
+ imageAlt: first(item.body?.imgs)?.alt || null,
276
+ links: item.body?.links || [],
277
+ }))
278
+ .filter((member) => member.name);
279
+ }
280
+
281
+ /**
282
+ * Extract gallery images
283
+ * Common pattern: Collection of images with captions
284
+ *
285
+ * @param {Object} parsed - Parsed content from parseContent()
286
+ * @param {Object} options - Extraction options
287
+ * @param {string} options.source - Source to extract from: 'main', 'items', 'all'
288
+ * @returns {Array} Gallery images
289
+ */
290
+ function gallery(parsed, options = {}) {
291
+ const { source = "all" } = options;
292
+ const images = [];
293
+
294
+ if (source === "main" || source === "all") {
295
+ const mainImages = parsed.groups?.main?.body?.imgs || [];
296
+ images.push(...mainImages);
297
+ }
298
+
299
+ if (source === "items" || source === "all") {
300
+ const items = parsed.groups?.items || [];
301
+ items.forEach((item) => {
302
+ const itemImages = item.body?.imgs || [];
303
+ images.push(...itemImages);
304
+ });
305
+ }
306
+
307
+ return images.map((img) => ({
308
+ url: img.url,
309
+ alt: img.alt || null,
310
+ caption: img.caption || null,
311
+ }));
312
+ }
313
+
314
+ /**
315
+ * Extract content in legacy Article class format
316
+ * Used for backward compatibility with existing components
317
+ *
318
+ * This extractor transforms the new parser output into the exact format
319
+ * used by the legacy Article class, enabling drop-in replacement without
320
+ * breaking existing components.
321
+ *
322
+ * @param {Object} parsed - Parsed content from parseContent()
323
+ * @returns {Object} Legacy format { main, items }
324
+ *
325
+ * @example
326
+ * const { parseContent, mappers } = require('@uniwebcms/semantic-parser');
327
+ * const parsed = parseContent(doc, { pretitleLevel: 2, parseCodeAsJson: true });
328
+ * const legacy = mappers.extractors.legacy(parsed);
329
+ * // Returns: { main: {...}, items: [...] }
330
+ */
331
+ function legacy(parsed) {
332
+ const groups = parsed.groups || {};
333
+
334
+ const transformGroup = (group) => {
335
+ if (!group) return null;
336
+
337
+ let imgs = group.body?.imgs || [];
338
+ let banner = imgs.filter((item) => {
339
+ return (item.role = "banner");
340
+ })?.[0];
341
+
342
+ if (!banner) banner = imgs[0];
343
+
344
+ return {
345
+ header: {
346
+ title: group.header?.title || "",
347
+ subtitle: group.header?.subtitle || "",
348
+ subtitle2: group.header?.subtitle2 || "",
349
+ pretitle: group.header?.pretitle || "",
350
+ // Auto-fill description (legacy behavior)
351
+ description:
352
+ group.header?.subtitle2 ||
353
+ first(group.body?.paragraphs) ||
354
+ "",
355
+ alignment: group.header?.alignment || "",
356
+ },
357
+ banner,
358
+ body: {
359
+ paragraphs: group.body?.paragraphs || [],
360
+ headings: group.body?.headings || [],
361
+ imgs,
362
+ videos: group.body?.videos || [],
363
+ lists: group.body?.lists || [],
364
+ links: group.body?.links || [],
365
+ icons: group.body?.icons || [],
366
+ buttons: group.body?.buttons || [],
367
+ cards: group.body?.cards || [],
368
+ documents: group.body?.documents || [],
369
+ forms: group.body?.forms || [],
370
+ form: first(group.body?.forms) || null,
371
+ quotes: group.body?.quotes || [],
372
+ properties: group.body?.properties || {},
373
+ propertyBlocks: group.body?.propertyBlocks || [],
374
+ },
375
+ };
376
+ };
377
+
378
+ return {
379
+ main: transformGroup(groups.main),
380
+ items: (groups.items || []).map(transformGroup),
381
+ };
382
+ }
383
+
384
+ export {
385
+ hero,
386
+ card,
387
+ article,
388
+ stats,
389
+ navigation,
390
+ features,
391
+ testimonial,
392
+ faq,
393
+ pricing,
394
+ team,
395
+ gallery,
396
+ legacy,
397
+ };