@uniweb/semantic-parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,573 @@
1
+ /**
2
+ * Process a ProseMirror/TipTap document into a flat sequence
3
+ * @param {Object} doc ProseMirror document
4
+ * @param {Object} options Parsing options
5
+ * @returns {Array} Sequence of content elements
6
+ */
7
+ function processSequence(doc, options = {}) {
8
+ const sequence = [];
9
+ processNode(doc, sequence, options);
10
+
11
+ return sequence;
12
+ }
13
+
14
+ function processNode(node, sequence, options) {
15
+ if (node.content && Array.isArray(node.content)) {
16
+ // node.content?.forEach((child) => processNode(child, sequence, options));
17
+ // return;
18
+ node.content?.forEach((child) => {
19
+ const element = createSequenceElement(child, options);
20
+
21
+ if (element) {
22
+ sequence.push(element);
23
+ }
24
+ });
25
+ }
26
+
27
+ // Create element based on node type
28
+ // const element = createSequenceElement(node, options);
29
+
30
+ // //Skip empty paragraph when create sequence
31
+ // if (element) {
32
+ // sequence.push(element);
33
+ // }
34
+ }
35
+
36
+ function createSequenceElement(node, options = {}) {
37
+ const attrs = node.attrs;
38
+ const content = node.content;
39
+
40
+ const linkVal = isLink(node);
41
+
42
+ if (linkVal) {
43
+ return {
44
+ type: "link",
45
+ attrs: linkVal, //label, href
46
+ };
47
+ }
48
+
49
+ const styledLink = isStyledLink(node);
50
+
51
+ if (styledLink) return styledLink;
52
+
53
+ switch (node.type) {
54
+ case "heading":
55
+ return {
56
+ type: "heading",
57
+ level: node.attrs.level,
58
+ text: getTextContent(content, options),
59
+ children: processInlineElements(content),
60
+ attrs,
61
+ };
62
+
63
+ case "paragraph": {
64
+ let textContent = getTextContent(content, options);
65
+
66
+ return {
67
+ type: "paragraph",
68
+ text: textContent,
69
+ children: processInlineElements(content),
70
+ attrs,
71
+ };
72
+ }
73
+ case "blockquote":
74
+ return {
75
+ type: "blockquote",
76
+ children: processSequence({
77
+ content,
78
+ }),
79
+ attrs,
80
+ };
81
+
82
+ case "codeBlock":
83
+ let textContent = getTextContent(content, options);
84
+ let parsed = "";
85
+
86
+ //Try pasre json if possible
87
+ try {
88
+ parsed = JSON.parse(`${textContent}`);
89
+ } catch (err) {
90
+ parsed = textContent;
91
+ }
92
+
93
+ return {
94
+ type: "codeBlock",
95
+ text: parsed,
96
+ attrs,
97
+ };
98
+
99
+ case "ImageBlock":
100
+ return {
101
+ type: "image",
102
+ attrs: parseImgBlock(attrs),
103
+ };
104
+ case "Video":
105
+ return {
106
+ type: "video",
107
+ attrs: parseVideoBlock(attrs),
108
+ };
109
+ case "bulletList":
110
+ case "orderedList": {
111
+ const listItems = content
112
+ .map((c) =>
113
+ c.type === "listItem" && c.content ? c.content : null
114
+ )
115
+ .filter(Boolean);
116
+
117
+ return {
118
+ type: "list",
119
+ style: node.type === "bulletList" ? "bullet" : "ordered",
120
+ children: listItems.map((listItem) => {
121
+ return processSequence({
122
+ content: listItem,
123
+ });
124
+ }),
125
+ attrs,
126
+ };
127
+ }
128
+
129
+ case "DividerBlock":
130
+ case "horizontalRule":
131
+ return {
132
+ type: "divider",
133
+ };
134
+
135
+ // Custom TipTap elements
136
+ case "card-group":
137
+ return {
138
+ type: "card-group",
139
+ cards:
140
+ node.content
141
+ ?.filter((c) => c.type === "card" && !c.attrs?.hidden)
142
+ .map((card) => parseCardBlock(card.attrs)) || [],
143
+ };
144
+
145
+ case "document-group":
146
+ return {
147
+ type: "document-group",
148
+ documents:
149
+ node.content
150
+ ?.filter((c) => c.type === "document")
151
+ .map((doc) => parseDocumentBlock(doc.attrs)) || [],
152
+ };
153
+
154
+ case "FormBlock":
155
+ // Parse form data (can be JSON string or object)
156
+ let formData = attrs?.data;
157
+ if (typeof formData === "string") {
158
+ try {
159
+ formData = JSON.parse(formData);
160
+ } catch (err) {
161
+ // Keep as string
162
+ }
163
+ }
164
+
165
+ return {
166
+ type: "form",
167
+ data: formData,
168
+ attrs,
169
+ };
170
+
171
+ case "button": {
172
+ let textContent = getTextContent(content, options);
173
+
174
+ if (!textContent) return null;
175
+
176
+ return {
177
+ type: "button",
178
+ text: textContent,
179
+ children: processInlineElements(content),
180
+ attrs,
181
+ };
182
+ }
183
+ case "UniwebIcon":
184
+ return {
185
+ type: "icon",
186
+ attrs: parseUniwebIcon(attrs),
187
+ };
188
+ case "Icon":
189
+ return {
190
+ type: "icon",
191
+ attrs: parseIconBlock(attrs),
192
+ };
193
+
194
+ default:
195
+ return {
196
+ type: node.type,
197
+ content: getTextContent(content, options),
198
+ };
199
+ }
200
+ }
201
+
202
+ function getTextContent(content, options = {}) {
203
+ if (!content) return "";
204
+
205
+ return content
206
+ .reduce((prev, curr) => {
207
+ const { type, marks = [], text } = curr;
208
+
209
+ if (type === "text") {
210
+ let styledText = text || "";
211
+
212
+ // Apply marks in order: textStyle, highlight, bold, italic, link
213
+ // This ensures proper nesting
214
+
215
+ // textStyle (color)
216
+ if (marks.some((mark) => mark.type === "textStyle")) {
217
+ const color = marks.find(
218
+ (mark) => mark.type === "textStyle"
219
+ )?.attrs?.color;
220
+ if (color) {
221
+ styledText = `<span style="color: var(--${color})">${styledText}</span>`;
222
+ }
223
+ }
224
+
225
+ // highlight
226
+ if (marks.some((mark) => mark.type === "highlight")) {
227
+ styledText = `<span style="background-color: var(--highlight)">${styledText}</span>`;
228
+ }
229
+
230
+ // bold
231
+ if (marks.some((mark) => mark.type === "bold")) {
232
+ styledText = `<strong>${styledText}</strong>`;
233
+ }
234
+
235
+ // italic
236
+ if (marks.some((mark) => mark.type === "italic")) {
237
+ styledText = `<em>${styledText}</em>`;
238
+ }
239
+
240
+ // link (outermost)
241
+ if (marks.some((mark) => mark.type === "link")) {
242
+ const linkMark = marks.find((mark) => mark.type === "link");
243
+ const href = linkMark.attrs.href;
244
+ const target = linkMark.attrs.target || "_self";
245
+
246
+ // Check if it's a file link (add download attribute)
247
+ const fileExtensions = [
248
+ "pdf",
249
+ "doc",
250
+ "docx",
251
+ "xls",
252
+ "xlsx",
253
+ "ppt",
254
+ "pptx",
255
+ "jpg",
256
+ "jpeg",
257
+ "png",
258
+ "webp",
259
+ "gif",
260
+ "svg",
261
+ "mp4",
262
+ "mp3",
263
+ "wav",
264
+ "mov",
265
+ "zip",
266
+ ];
267
+ const extension = href.split(".").pop()?.toLowerCase();
268
+ const isFileLink = fileExtensions.includes(extension);
269
+
270
+ styledText = `<a href="${href}" target="${target}"${
271
+ isFileLink ? " download" : ""
272
+ }>${styledText}</a>`;
273
+ }
274
+
275
+ return prev + styledText;
276
+ } else if (type === "hardBreak") {
277
+ return prev + "<br>";
278
+ } else {
279
+ // console.warn(`unhandled text content type: ${type}`, curr);
280
+ return prev;
281
+ }
282
+ }, "")
283
+ .trim();
284
+ }
285
+
286
+ function processInlineElements(content) {
287
+ if (!content) return [];
288
+
289
+ const items = [];
290
+
291
+ for (const item of content) {
292
+ if (item.type === "UniwebIcon") {
293
+ items.push({
294
+ type: "icon",
295
+ attrs: parseUniwebIcon(item.attrs),
296
+ });
297
+ } else if (item.type === "math-inline") {
298
+ items.push(item);
299
+ }
300
+ }
301
+
302
+ return items;
303
+ }
304
+
305
+ function makeAssetUrl(info) {
306
+ let url = "";
307
+
308
+ let src = info?.src || info?.url || "";
309
+
310
+ if (src) {
311
+ url = src;
312
+ } else if (info?.identifier) {
313
+ url =
314
+ new uniweb.Profile(`docufolio/profile`, "_template").getAssetInfo(
315
+ info.identifier
316
+ )?.src || "";
317
+ }
318
+
319
+ return url;
320
+ }
321
+
322
+ function parseCardBlock(itemAttrs) {
323
+ const { address, ...others } = itemAttrs;
324
+
325
+ let parsedAddress = null;
326
+
327
+ try {
328
+ if (address) {
329
+ parsedAddress = JSON.parse(address);
330
+ }
331
+ } catch {}
332
+
333
+ const { coverImg = null, icon } = others;
334
+
335
+ if (icon) {
336
+ others.icon = parseUniwebIcon(icon);
337
+ }
338
+
339
+ return {
340
+ ...others,
341
+ address: parsedAddress,
342
+ coverImg: makeAssetUrl(coverImg),
343
+ };
344
+ }
345
+
346
+ function parseDocumentBlock(itemAttrs) {
347
+ const { src, info = {}, coverImg = null, ...others } = itemAttrs;
348
+
349
+ let ele = {
350
+ ...others,
351
+ coverImg: makeAssetUrl(coverImg),
352
+ };
353
+
354
+ if (src) {
355
+ ele.href = src;
356
+ } else {
357
+ const { identifier = "" } = info;
358
+
359
+ if (identifier) {
360
+ ele.downloadUrl = new uniweb.Profile(
361
+ `docufolio/profile`,
362
+ "_template"
363
+ ).getAssetInfo(identifier)?.href;
364
+ }
365
+ }
366
+
367
+ return ele;
368
+ }
369
+
370
+ function parseUniwebIcon(itemAttrs) {
371
+ let { svg, url, size, color, preserveColors } = itemAttrs;
372
+
373
+ return {
374
+ svg,
375
+ url,
376
+ size,
377
+ color,
378
+ preserveColors,
379
+ };
380
+ }
381
+
382
+ function parseIconBlock(itemAttrs) {
383
+ let { svg } = itemAttrs;
384
+
385
+ return svg;
386
+ }
387
+
388
+ function parseImgBlock(itemAttrs) {
389
+ let {
390
+ info: imgInfo,
391
+ targetId,
392
+ caption = "",
393
+ direction,
394
+ filter,
395
+ alt = "",
396
+ url,
397
+ href = "",
398
+ theme,
399
+ role,
400
+ credit = "",
401
+ } = itemAttrs;
402
+
403
+ let { contentType, viewType, contentId, identifier } = imgInfo;
404
+
405
+ const sizes = {
406
+ center: "basic",
407
+ wide: "lg",
408
+ fill: "full",
409
+ };
410
+
411
+ caption = stripTags(caption);
412
+
413
+ if (identifier) {
414
+ url = makeAssetUrl(imgInfo);
415
+ }
416
+
417
+ return {
418
+ contentType,
419
+ viewType,
420
+ contentId: targetId || contentId,
421
+ url,
422
+ value: identifier || "",
423
+ alt: alt || caption,
424
+ caption,
425
+ direction,
426
+ filter,
427
+ imgPos: direction === "left" || direction === "right" ? direction : "",
428
+ size: sizes[direction] || "basic",
429
+ href,
430
+ theme,
431
+ role,
432
+ credit,
433
+ };
434
+ }
435
+
436
+ function parseVideoBlock(itemAttrs) {
437
+ let {
438
+ src,
439
+ caption = "",
440
+ direction,
441
+ info = {},
442
+ coverImg = {},
443
+ alt,
444
+ } = itemAttrs;
445
+
446
+ let video = makeAssetUrl({
447
+ src,
448
+ ...info,
449
+ });
450
+
451
+ return {
452
+ src: video,
453
+ caption,
454
+ direction,
455
+ coverImg: makeAssetUrl(coverImg),
456
+ alt,
457
+ };
458
+ }
459
+
460
+ function stripTags(htmlString) {
461
+ if (!htmlString || typeof htmlString !== "string") return "";
462
+
463
+ // Remove HTML tags using regular expression
464
+ const plainString = htmlString.replace(/<[^>]*>/g, "");
465
+
466
+ // Decode HTML entities
467
+ const decodedString = new DOMParser().parseFromString(
468
+ plainString,
469
+ "text/html"
470
+ ).body.textContent;
471
+
472
+ return decodedString;
473
+ }
474
+
475
+ function isLink(item) {
476
+ //For fast check, we only assume link in paragraph or heading
477
+ if (["paragraph", "heading"].includes(item.type)) {
478
+ let content = item?.content || [];
479
+
480
+ //filter out icons
481
+ content = content.filter((c) => {
482
+ if (c.type === "UniwebIcon") {
483
+ return false;
484
+ } else if (c.type === "text") {
485
+ return (c.text || "").trim() !== "";
486
+ }
487
+
488
+ return true;
489
+ });
490
+
491
+ if (content.length === 1) {
492
+ let contentItem = content?.[0];
493
+ let marks = contentItem?.marks || [];
494
+
495
+ for (let l = 0; l < marks.length; l++) {
496
+ let mark = marks[l];
497
+
498
+ const markType = mark?.type;
499
+
500
+ if (markType === "link") {
501
+ return {
502
+ href: mark?.attrs?.href,
503
+ label: contentItem?.text || "",
504
+ children: processInlineElements(content),
505
+ };
506
+ }
507
+ }
508
+ }
509
+ }
510
+
511
+ return false;
512
+ }
513
+
514
+ // method to check if given item has multiple content parts and each of them has the same link attrs with different inline style (plain, em, strong, u)
515
+ // if so, it will return the link attrs and all the content parts whose link mark has been removed
516
+ // warning: This method will not work if the any of the content parts are not link marks
517
+ function isStyledLink(item) {
518
+ if (!["paragraph", "heading"].includes(item.type)) return false;
519
+
520
+ let content = item?.content || [];
521
+
522
+ if (!content.length) return false;
523
+
524
+ content = content.filter((c) => {
525
+ if (c.type === "UniwebIcon") {
526
+ return false;
527
+ }
528
+
529
+ return true;
530
+ });
531
+
532
+ // check if all content items have the same link mark
533
+ let firstLinkMark = content[0]?.marks?.find(
534
+ (mark) => mark.type === "link" && mark.attrs
535
+ );
536
+ if (!firstLinkMark) return false;
537
+ if (
538
+ !content.every(
539
+ (c) =>
540
+ c?.marks?.some(
541
+ (mark) =>
542
+ mark.type === "link" &&
543
+ mark.attrs?.href === firstLinkMark.attrs?.href
544
+ ) || false
545
+ )
546
+ )
547
+ return false;
548
+
549
+ const { href, target } = firstLinkMark.attrs;
550
+
551
+ const cleanedContent = content.map((c) => {
552
+ // remove link marks from content items
553
+ const cleanedMarks =
554
+ c.marks?.filter((mark) => mark.type !== "link") || [];
555
+ return {
556
+ ...c,
557
+ marks: cleanedMarks,
558
+ };
559
+ });
560
+
561
+ let textContent = getTextContent(cleanedContent);
562
+
563
+ if (!textContent) return false;
564
+
565
+ return {
566
+ type: "paragraph",
567
+ children: processInlineElements(item.content),
568
+ text: `<a target="${target}" href="${href}">${textContent}</a>`,
569
+ attrs: item.attrs,
570
+ };
571
+ }
572
+
573
+ export { processSequence };