@uniweb/semantic-parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,495 @@
1
+ /**
2
+ * Field type definitions for content transformation
3
+ * Handles automatic cleanup and transformation based on component requirements
4
+ */
5
+
6
+ /**
7
+ * Strip all HTML/markup from text, preserving only text content
8
+ * @param {string} text - Text with potential markup
9
+ * @param {Object} options - Stripping options
10
+ * @returns {string} Plain text
11
+ */
12
+ function stripMarkup(text, options = {}) {
13
+ if (typeof text !== 'string') return '';
14
+
15
+ const {
16
+ preserveLineBreaks = false,
17
+ preserveWhitespace = false
18
+ } = options;
19
+
20
+ let result = text;
21
+
22
+ // Convert <br> to newlines if preserving line breaks
23
+ if (preserveLineBreaks) {
24
+ result = result.replace(/<br\s*\/?>/gi, '\n');
25
+ }
26
+
27
+ // Remove all HTML tags
28
+ result = result.replace(/<[^>]*>/g, '');
29
+
30
+ // Decode HTML entities
31
+ result = result
32
+ .replace(/&nbsp;/g, ' ')
33
+ .replace(/&amp;/g, '&')
34
+ .replace(/&lt;/g, '<')
35
+ .replace(/&gt;/g, '>')
36
+ .replace(/&quot;/g, '"')
37
+ .replace(/&#39;/g, "'");
38
+
39
+ // Normalize whitespace unless preserving
40
+ if (!preserveWhitespace && !preserveLineBreaks) {
41
+ result = result.replace(/\s+/g, ' ').trim();
42
+ } else if (!preserveWhitespace && preserveLineBreaks) {
43
+ // Preserve line breaks but normalize spaces within lines
44
+ result = result.split('\n').map(line => line.replace(/\s+/g, ' ').trim()).join('\n');
45
+ }
46
+
47
+ return result;
48
+ }
49
+
50
+ /**
51
+ * Truncate text to specified length with smart boundary detection
52
+ * @param {string} text - Text to truncate
53
+ * @param {Object} options - Truncation options
54
+ * @returns {string} Truncated text
55
+ */
56
+ function truncateText(text, options = {}) {
57
+ if (typeof text !== 'string') return '';
58
+
59
+ const {
60
+ maxLength,
61
+ boundary = 'word', // 'word', 'sentence', 'character'
62
+ ellipsis = '...',
63
+ stripMarkup: strip = false
64
+ } = options;
65
+
66
+ if (!maxLength) return text;
67
+
68
+ // Strip markup if requested
69
+ let result = strip ? stripMarkup(text) : text;
70
+
71
+ // Already short enough
72
+ if (result.length <= maxLength) return result;
73
+
74
+ // Truncate with boundary awareness
75
+ if (boundary === 'character') {
76
+ return result.substring(0, maxLength) + ellipsis;
77
+ }
78
+
79
+ if (boundary === 'sentence') {
80
+ // Find last sentence end before maxLength
81
+ const truncated = result.substring(0, maxLength);
82
+ const lastPeriod = Math.max(
83
+ truncated.lastIndexOf('. '),
84
+ truncated.lastIndexOf('! '),
85
+ truncated.lastIndexOf('? ')
86
+ );
87
+
88
+ if (lastPeriod > maxLength * 0.5) {
89
+ return result.substring(0, lastPeriod + 1);
90
+ }
91
+ }
92
+
93
+ // Word boundary (default)
94
+ const truncated = result.substring(0, maxLength);
95
+ const lastSpace = truncated.lastIndexOf(' ');
96
+
97
+ if (lastSpace > maxLength * 0.7) {
98
+ return result.substring(0, lastSpace) + ellipsis;
99
+ }
100
+
101
+ return truncated + ellipsis;
102
+ }
103
+
104
+ /**
105
+ * Sanitize HTML, removing dangerous tags while preserving safe formatting
106
+ * @param {string} html - HTML to sanitize
107
+ * @param {Object} options - Sanitization options
108
+ * @returns {string} Sanitized HTML
109
+ */
110
+ function sanitizeHtml(html, options = {}) {
111
+ if (typeof html !== 'string') return '';
112
+
113
+ const {
114
+ allowedTags = ['strong', 'em', 'a', 'br'],
115
+ stripTags = ['script', 'style', 'iframe', 'object', 'embed']
116
+ } = options;
117
+
118
+ let result = html;
119
+
120
+ // Remove explicitly forbidden tags and their content
121
+ stripTags.forEach(tag => {
122
+ const regex = new RegExp(`<${tag}[^>]*>.*?<\/${tag}>`, 'gis');
123
+ result = result.replace(regex, '');
124
+ });
125
+
126
+ // Remove tags not in allowedTags
127
+ const tagRegex = /<\/?([a-z][a-z0-9]*)\b[^>]*>/gi;
128
+ result = result.replace(tagRegex, (match, tagName) => {
129
+ if (allowedTags.includes(tagName.toLowerCase())) {
130
+ // Keep allowed tags, but sanitize attributes for anchors
131
+ if (tagName.toLowerCase() === 'a') {
132
+ const hrefMatch = match.match(/href=["']([^"']+)["']/);
133
+ if (hrefMatch) {
134
+ return `<a href="${hrefMatch[1]}">`;
135
+ }
136
+ return match.includes('</') ? '</a>' : '<a>';
137
+ }
138
+ return match;
139
+ }
140
+ return '';
141
+ });
142
+
143
+ return result;
144
+ }
145
+
146
+ /**
147
+ * Create an excerpt from text content
148
+ * @param {string|Array} content - Text or array of paragraphs
149
+ * @param {Object} options - Excerpt options
150
+ * @returns {string} Excerpt
151
+ */
152
+ function createExcerpt(content, options = {}) {
153
+ const {
154
+ maxLength = 150,
155
+ boundary = 'word',
156
+ ellipsis = '...',
157
+ preferFirstSentence = true
158
+ } = options;
159
+
160
+ // Convert array to string
161
+ let text = Array.isArray(content) ? content.join(' ') : content;
162
+ if (typeof text !== 'string') return '';
163
+
164
+ // Always strip markup for excerpts
165
+ text = stripMarkup(text);
166
+
167
+ // Try to get first sentence if preferred and not too long
168
+ if (preferFirstSentence) {
169
+ const firstSentence = text.match(/^[^.!?]+[.!?]/);
170
+ if (firstSentence && firstSentence[0].length <= maxLength * 1.2) {
171
+ return firstSentence[0].trim();
172
+ }
173
+ }
174
+
175
+ return truncateText(text, { maxLength, boundary, ellipsis });
176
+ }
177
+
178
+ /**
179
+ * Type handlers for field transformations
180
+ */
181
+ const typeHandlers = {
182
+ /**
183
+ * Plain text - strips all markup
184
+ */
185
+ plaintext: {
186
+ transform: (value, options = {}) => {
187
+ if (value === null || value === undefined) return '';
188
+
189
+ const text = String(value);
190
+ let result = stripMarkup(text, options);
191
+
192
+ if (options.maxLength) {
193
+ result = truncateText(result, {
194
+ maxLength: options.maxLength,
195
+ boundary: options.boundary || 'word',
196
+ ellipsis: options.ellipsis || '...'
197
+ });
198
+ }
199
+
200
+ if (options.transform) {
201
+ result = options.transform(result);
202
+ }
203
+
204
+ return result;
205
+ },
206
+
207
+ validate: (value, rules = {}, context = 'visual-editor') => {
208
+ const errors = [];
209
+ const stripped = stripMarkup(String(value || ''));
210
+
211
+ // Only warn in build mode
212
+ if (context === 'build') {
213
+ if (/<[^>]*>/.test(value)) {
214
+ errors.push({
215
+ field: rules.fieldName,
216
+ type: 'markup_detected',
217
+ message: 'Field contains HTML markup but expects plain text',
218
+ severity: 'warning',
219
+ autoFix: true
220
+ });
221
+ }
222
+ }
223
+
224
+ if (rules.required && !stripped) {
225
+ errors.push({
226
+ field: rules.fieldName,
227
+ type: 'required',
228
+ message: 'Required field is missing',
229
+ severity: 'error',
230
+ autoFix: false
231
+ });
232
+ }
233
+
234
+ if (rules.maxLength && stripped.length > rules.maxLength) {
235
+ errors.push({
236
+ field: rules.fieldName,
237
+ type: 'max_length',
238
+ message: `Text is ${stripped.length} characters (max: ${rules.maxLength})`,
239
+ severity: context === 'build' ? 'warning' : 'info',
240
+ autoFix: true
241
+ });
242
+ }
243
+
244
+ if (rules.minLength && stripped.length < rules.minLength) {
245
+ errors.push({
246
+ field: rules.fieldName,
247
+ type: 'min_length',
248
+ message: `Text is ${stripped.length} characters (min: ${rules.minLength})`,
249
+ severity: 'warning',
250
+ autoFix: false
251
+ });
252
+ }
253
+
254
+ return errors;
255
+ }
256
+ },
257
+
258
+ /**
259
+ * Rich text - preserves safe HTML, removes dangerous tags
260
+ */
261
+ richtext: {
262
+ transform: (value, options = {}) => {
263
+ if (value === null || value === undefined) return '';
264
+
265
+ const text = String(value);
266
+ let result = sanitizeHtml(text, {
267
+ allowedTags: options.allowedTags || ['strong', 'em', 'a', 'br'],
268
+ stripTags: options.stripTags || ['script', 'style', 'iframe']
269
+ });
270
+
271
+ if (options.maxLength) {
272
+ // For richtext, truncate but preserve markup
273
+ result = truncateText(result, {
274
+ maxLength: options.maxLength,
275
+ boundary: options.boundary || 'word',
276
+ ellipsis: options.ellipsis || '...',
277
+ stripMarkup: false
278
+ });
279
+ }
280
+
281
+ return result;
282
+ },
283
+
284
+ validate: (value, rules = {}, context = 'visual-editor') => {
285
+ const errors = [];
286
+
287
+ if (rules.required && !stripMarkup(String(value || ''))) {
288
+ errors.push({
289
+ field: rules.fieldName,
290
+ type: 'required',
291
+ message: 'Required field is missing',
292
+ severity: 'error',
293
+ autoFix: false
294
+ });
295
+ }
296
+
297
+ return errors;
298
+ }
299
+ },
300
+
301
+ /**
302
+ * Excerpt - auto-generates excerpt from content
303
+ */
304
+ excerpt: {
305
+ transform: (value, options = {}) => {
306
+ return createExcerpt(value, {
307
+ maxLength: options.maxLength || 150,
308
+ boundary: options.boundary || 'word',
309
+ ellipsis: options.ellipsis || '...',
310
+ preferFirstSentence: options.preferFirstSentence !== false
311
+ });
312
+ },
313
+
314
+ validate: () => [] // Excerpts are auto-generated, no validation needed
315
+ },
316
+
317
+ /**
318
+ * Number - parses and formats numbers
319
+ */
320
+ number: {
321
+ transform: (value, options = {}) => {
322
+ const num = parseFloat(value);
323
+ if (isNaN(num)) {
324
+ return options.defaultValue !== undefined ? options.defaultValue : 0;
325
+ }
326
+
327
+ if (options.format) {
328
+ // Simple number formatting
329
+ const { decimals, thousands = ',', decimal = '.' } = options.format;
330
+
331
+ let result = decimals !== undefined
332
+ ? num.toFixed(decimals)
333
+ : String(num);
334
+
335
+ if (thousands) {
336
+ const parts = result.split('.');
337
+ parts[0] = parts[0].replace(/\B(?=(\d{3})+(?!\d))/g, thousands);
338
+ result = parts.join(decimal);
339
+ }
340
+
341
+ return result;
342
+ }
343
+
344
+ return num;
345
+ },
346
+
347
+ validate: (value, rules = {}) => {
348
+ const errors = [];
349
+ const num = parseFloat(value);
350
+
351
+ if (rules.required && isNaN(num)) {
352
+ errors.push({
353
+ field: rules.fieldName,
354
+ type: 'invalid_number',
355
+ message: 'Value is not a valid number',
356
+ severity: 'error',
357
+ autoFix: false
358
+ });
359
+ }
360
+
361
+ return errors;
362
+ }
363
+ },
364
+
365
+ /**
366
+ * Image - processes image data
367
+ */
368
+ image: {
369
+ transform: (value, options = {}) => {
370
+ if (!value) {
371
+ return options.defaultValue || null;
372
+ }
373
+
374
+ // Handle string (URL) or object (full image data)
375
+ if (typeof value === 'string') {
376
+ return {
377
+ url: value,
378
+ alt: options.defaultAlt || '',
379
+ caption: null
380
+ };
381
+ }
382
+
383
+ return {
384
+ url: value.url || value.src,
385
+ alt: value.alt || options.defaultAlt || '',
386
+ caption: value.caption || value.title || null,
387
+ width: value.width,
388
+ height: value.height
389
+ };
390
+ },
391
+
392
+ validate: (value, rules = {}) => {
393
+ const errors = [];
394
+
395
+ if (rules.required && !value) {
396
+ errors.push({
397
+ field: rules.fieldName,
398
+ type: 'required',
399
+ message: 'Required image is missing',
400
+ severity: 'error',
401
+ autoFix: false
402
+ });
403
+ }
404
+
405
+ return errors;
406
+ }
407
+ },
408
+
409
+ /**
410
+ * Link - processes link data
411
+ */
412
+ link: {
413
+ transform: (value, options = {}) => {
414
+ if (!value) {
415
+ return options.defaultValue || null;
416
+ }
417
+
418
+ // Handle string (URL) or object (full link data)
419
+ if (typeof value === 'string') {
420
+ return {
421
+ href: value,
422
+ label: options.defaultLabel || value,
423
+ target: value.startsWith('http') ? '_blank' : '_self'
424
+ };
425
+ }
426
+
427
+ return {
428
+ href: value.href || value.url,
429
+ label: value.label || value.text || value.href,
430
+ target: value.target || (value.external ? '_blank' : '_self')
431
+ };
432
+ },
433
+
434
+ validate: (value, rules = {}) => {
435
+ const errors = [];
436
+
437
+ if (rules.required && !value) {
438
+ errors.push({
439
+ field: rules.fieldName,
440
+ type: 'required',
441
+ message: 'Required link is missing',
442
+ severity: 'error',
443
+ autoFix: false
444
+ });
445
+ }
446
+
447
+ return errors;
448
+ }
449
+ }
450
+ };
451
+
452
+ /**
453
+ * Apply type transformation to a value
454
+ * @param {*} value - Value to transform
455
+ * @param {string} type - Field type
456
+ * @param {Object} options - Type-specific options
457
+ * @returns {*} Transformed value
458
+ */
459
+ function applyType(value, type, options = {}) {
460
+ const handler = typeHandlers[type];
461
+ if (!handler) {
462
+ console.warn(`Unknown field type: ${type}`);
463
+ return value;
464
+ }
465
+
466
+ return handler.transform(value, options);
467
+ }
468
+
469
+ /**
470
+ * Validate value against type and rules
471
+ * @param {*} value - Value to validate
472
+ * @param {string} type - Field type
473
+ * @param {Object} rules - Validation rules
474
+ * @param {string} context - Execution context (visual-editor or build)
475
+ * @returns {Array} Array of validation errors/warnings
476
+ */
477
+ function validateType(value, type, rules = {}, context = 'visual-editor') {
478
+ const handler = typeHandlers[type];
479
+ if (!handler) {
480
+ return [];
481
+ }
482
+
483
+ return handler.validate(value, rules, context);
484
+ }
485
+
486
+ export {
487
+ typeHandlers,
488
+ applyType,
489
+ validateType,
490
+ // Export utilities for direct use
491
+ stripMarkup,
492
+ truncateText,
493
+ sanitizeHtml,
494
+ createExcerpt
495
+ };
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Organize content elements by their type while preserving context
3
+ * @param {Array} sequence Flat sequence of elements
4
+ * @returns {Object} Content organized by type
5
+ */
6
+ function processByType(sequence) {
7
+ const collections = {
8
+ headings: [],
9
+ paragraphs: [],
10
+ images: {
11
+ background: [],
12
+ content: [],
13
+ gallery: [],
14
+ icon: [],
15
+ },
16
+ lists: [],
17
+ dividers: [],
18
+ metadata: {
19
+ totalElements: sequence.length,
20
+ dominantType: null,
21
+ hasMedia: false,
22
+ },
23
+ };
24
+
25
+ // Track type frequencies for metadata
26
+ const typeFrequency = new Map();
27
+
28
+ sequence.forEach((element, index) => {
29
+ // Track element type frequency
30
+ typeFrequency.set(element.type, (typeFrequency.get(element.type) || 0) + 1);
31
+
32
+ // Add context information
33
+ const context = getElementContext(sequence, index);
34
+ const enrichedElement = { ...element, context };
35
+
36
+ // Process element based on type
37
+ switch (element.type) {
38
+ case "heading":
39
+ collections.headings.push(enrichedElement);
40
+ break;
41
+
42
+ case "paragraph":
43
+ collections.paragraphs.push(enrichedElement);
44
+ break;
45
+
46
+ case "image": {
47
+ const role = element.role || "content";
48
+ if (!collections.images[role]) {
49
+ collections.images[role] = [];
50
+ }
51
+ collections.images[role].push(enrichedElement);
52
+ collections.metadata.hasMedia = true;
53
+ break;
54
+ }
55
+
56
+ case "list":
57
+ collections.lists.push(enrichedElement);
58
+ break;
59
+
60
+ case "divider":
61
+ collections.dividers.push(enrichedElement);
62
+ break;
63
+ }
64
+ });
65
+
66
+ // Calculate dominant type
67
+ let maxFrequency = 0;
68
+ typeFrequency.forEach((frequency, type) => {
69
+ if (frequency > maxFrequency) {
70
+ maxFrequency = frequency;
71
+ collections.metadata.dominantType = type;
72
+ }
73
+ });
74
+
75
+ // Add helper methods
76
+ addCollectionHelpers(collections);
77
+
78
+ return collections;
79
+ }
80
+
81
+ /**
82
+ * Get context information for an element
83
+ */
84
+ function getElementContext(sequence, position) {
85
+ const context = {
86
+ position,
87
+ previousElement: position > 0 ? sequence[position - 1] : null,
88
+ nextElement: position < sequence.length - 1 ? sequence[position + 1] : null,
89
+ nearestHeading: null,
90
+ };
91
+
92
+ // Find nearest preceding heading
93
+ for (let i = position - 1; i >= 0; i--) {
94
+ if (sequence[i].type === "heading") {
95
+ context.nearestHeading = sequence[i];
96
+ break;
97
+ }
98
+ }
99
+
100
+ return context;
101
+ }
102
+
103
+ /**
104
+ * Add helper methods to collections
105
+ */
106
+ function addCollectionHelpers(collections) {
107
+ // Get headings of specific level
108
+ collections.getHeadingsByLevel = function (level) {
109
+ return this.headings.filter((h) => h.level === level);
110
+ };
111
+
112
+ // Get elements by heading context
113
+ collections.getElementsByHeadingContext = function (headingFilter) {
114
+ const allElements = [
115
+ ...this.paragraphs,
116
+ ...Object.values(this.images).flat(),
117
+ ...this.lists,
118
+ ];
119
+
120
+ return allElements.filter(
121
+ (el) =>
122
+ el.context?.nearestHeading && headingFilter(el.context.nearestHeading)
123
+ );
124
+ };
125
+ }
126
+
127
+ export {
128
+ processByType
129
+ };