@uniweb/semantic-parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,402 @@
1
+ /**
2
+ * Process a ProseMirror/TipTap document into a flat sequence
3
+ * @param {Object} doc ProseMirror document
4
+ * @param {Object} options Parsing options
5
+ * @returns {Array} Sequence of content elements
6
+ */
7
+ function processSequence(doc, options = {}) {
8
+ const sequence = [];
9
+ processNode(doc, sequence, options);
10
+
11
+ return sequence;
12
+ }
13
+
14
+ function processNode(node, sequence, options) {
15
+ // Special handling for root doc node
16
+ if (node.type === "doc") {
17
+ node.content?.forEach((child) => processNode(child, sequence, options));
18
+ return;
19
+ }
20
+
21
+ // Create element based on node type
22
+ const element = createSequenceElement(node, options);
23
+
24
+ if (element) {
25
+ sequence.push(element);
26
+ }
27
+ }
28
+
29
+ function createSequenceElement(node, options = {}) {
30
+ function isLink() {
31
+ if (node.type === "paragraph" && node.content.length === 1) {
32
+ return (
33
+ node.content[0].marks?.some((mark) => mark.type === "link") ||
34
+ false
35
+ );
36
+ }
37
+ }
38
+
39
+ function isStyledLink() {
40
+ // Check if paragraph has multiple content parts with same link mark
41
+ if (
42
+ node.type === "paragraph" &&
43
+ node.content &&
44
+ node.content.length > 1
45
+ ) {
46
+ // Filter out icons
47
+ const content = node.content.filter(
48
+ (c) => c.type !== "UniwebIcon" && c.type !== "image"
49
+ );
50
+
51
+ if (content.length === 0) return false;
52
+
53
+ // Get first link mark
54
+ const firstLinkMark = content[0]?.marks?.find(
55
+ (mark) => mark.type === "link" && mark.attrs
56
+ );
57
+ if (!firstLinkMark) return false;
58
+
59
+ // Check if all content items have same link mark
60
+ const allHaveSameLink = content.every((c) =>
61
+ c?.marks?.some(
62
+ (mark) =>
63
+ mark.type === "link" &&
64
+ mark.attrs?.href === firstLinkMark.attrs.href
65
+ )
66
+ );
67
+
68
+ return allHaveSameLink ? firstLinkMark : false;
69
+ }
70
+ return false;
71
+ }
72
+
73
+ function isImage() {
74
+ if (node.type === "paragraph" && node.content.length === 1) {
75
+ return (
76
+ node.content[0].type === "image" &&
77
+ (node.content[0].attrs.role === "image" ||
78
+ node.content[0].attrs.role === "banner")
79
+ );
80
+ }
81
+ }
82
+
83
+ function isIcon() {
84
+ if (node.type === "paragraph" && node.content.length === 1) {
85
+ return (
86
+ node.content[0].type === "image" &&
87
+ node.content[0].attrs.role === "icon"
88
+ );
89
+ }
90
+ }
91
+
92
+ function isButton() {
93
+ if (node.type === "paragraph" && node.content.length === 1) {
94
+ return (
95
+ node.content[0].type === "text" &&
96
+ node.content[0].marks?.some((mark) => mark.type === "button")
97
+ );
98
+ }
99
+ }
100
+
101
+ function isVideo() {
102
+ if (node.type === "paragraph" && node.content.length === 1) {
103
+ return (
104
+ node.content[0].type === "image" &&
105
+ node.content[0].attrs.role === "video"
106
+ );
107
+ }
108
+ }
109
+
110
+ // extract pure [type] content from the paragraph node for easier handling in the byGroup processor
111
+
112
+ // Check styled link first (multi-part link)
113
+ const styledLinkMark = isStyledLink();
114
+ if (styledLinkMark) {
115
+ // Remove link marks from content, keep other styling
116
+ const cleanedContent = node.content
117
+ .filter((c) => c.type !== "UniwebIcon" && c.type !== "image")
118
+ .map((c) => ({
119
+ ...c,
120
+ marks: c.marks?.filter((mark) => mark.type !== "link") || [],
121
+ }));
122
+
123
+ return {
124
+ type: "styledLink",
125
+ href: styledLinkMark.attrs.href,
126
+ target: styledLinkMark.attrs.target || "_self",
127
+ content: getTextContent(
128
+ { ...node, content: cleanedContent },
129
+ options
130
+ ),
131
+ };
132
+ }
133
+
134
+ // Simple single-part link
135
+ if (isLink()) {
136
+ return {
137
+ type: "link",
138
+ content: {
139
+ href: node.content[0].marks.find((mark) => mark.type === "link")
140
+ .attrs.href,
141
+ label: node.content[0].text,
142
+ },
143
+ };
144
+ }
145
+
146
+ if (isImage()) {
147
+ return {
148
+ type: "image",
149
+ src: node.content[0].attrs.src,
150
+ caption: node.content[0].attrs.title,
151
+ alt: node.content[0].attrs.alt || node.content[0].attrs.title,
152
+ role: node.content[0].attrs.role,
153
+ };
154
+ }
155
+
156
+ if (isIcon()) {
157
+ return {
158
+ type: "icon",
159
+ svg: node.content[0].attrs.svg,
160
+ };
161
+ }
162
+
163
+ if (isButton()) {
164
+ return {
165
+ type: "button",
166
+ content: node.content[0].text,
167
+ attrs: node.content[0].marks.find((mark) => mark.type === "button")
168
+ .attrs,
169
+ };
170
+ }
171
+
172
+ if (isVideo()) {
173
+ return {
174
+ type: "video",
175
+ src: node.content[0].attrs.src,
176
+ caption: node.content[0].attrs.title,
177
+ alt: node.content[0].attrs.alt || node.content[0].attrs.title,
178
+ };
179
+ }
180
+
181
+ switch (node.type) {
182
+ case "heading":
183
+ return {
184
+ type: "heading",
185
+ level: node.attrs.level,
186
+ content: getTextContent(node, options),
187
+ attrs: node.attrs, // Pass through all attributes (including textAlign)
188
+ };
189
+
190
+ case "paragraph":
191
+ return {
192
+ type: "paragraph",
193
+ content: getTextContent(node, options),
194
+ };
195
+
196
+ case "blockquote":
197
+ // Process blockquote content recursively
198
+ return {
199
+ type: "blockquote",
200
+ content:
201
+ node.content
202
+ ?.map((child) => createSequenceElement(child, options))
203
+ .filter(Boolean) || [],
204
+ };
205
+
206
+ case "codeBlock":
207
+ const textContent = getTextContent(node, options);
208
+ let parsedJson = null;
209
+
210
+ if (options.parseCodeAsJson) {
211
+ try {
212
+ parsedJson = JSON.parse(textContent);
213
+ } catch (err) {
214
+ // Invalid JSON, keep as string
215
+ }
216
+ }
217
+
218
+ return {
219
+ type: "codeBlock",
220
+ content: textContent,
221
+ parsed: parsedJson,
222
+ };
223
+
224
+ case "image":
225
+ return {
226
+ type: "image",
227
+ src: node.attrs.src,
228
+ alt: node.attrs.alt,
229
+ role: node.attrs.role,
230
+ };
231
+
232
+ case "bulletList":
233
+ case "orderedList":
234
+ return {
235
+ type: "list",
236
+ style: node.type === "bulletList" ? "bullet" : "ordered",
237
+ items: processListItems(node, options),
238
+ };
239
+
240
+ case "listItem":
241
+ return {
242
+ type: "listItem",
243
+ content: getTextContent(node, options),
244
+ };
245
+
246
+ case "horizontalRule":
247
+ return {
248
+ type: "divider",
249
+ };
250
+
251
+ // Custom TipTap elements
252
+ case "card-group":
253
+ return {
254
+ type: "card-group",
255
+ cards:
256
+ node.content
257
+ ?.filter((c) => c.type === "card" && !c.attrs?.hidden)
258
+ .map((card) => ({
259
+ ...card.attrs,
260
+ type: "card",
261
+ })) || [],
262
+ };
263
+
264
+ case "document-group":
265
+ return {
266
+ type: "document-group",
267
+ documents:
268
+ node.content
269
+ ?.filter((c) => c.type === "document")
270
+ .map((doc) => ({
271
+ ...doc.attrs,
272
+ type: "document",
273
+ })) || [],
274
+ };
275
+
276
+ case "FormBlock":
277
+ // Parse form data (can be JSON string or object)
278
+ let formData = node.attrs?.data;
279
+ if (typeof formData === "string") {
280
+ try {
281
+ formData = JSON.parse(formData);
282
+ } catch (err) {
283
+ // Keep as string
284
+ }
285
+ }
286
+ return {
287
+ type: "form",
288
+ data: formData,
289
+ attrs: node.attrs,
290
+ };
291
+
292
+ case "text":
293
+ return null;
294
+
295
+ default:
296
+ return {
297
+ type: node.type,
298
+ content: getTextContent(node, options),
299
+ };
300
+ }
301
+ }
302
+
303
+ function getTextContent(node, options = {}) {
304
+ if (!node.content) return "";
305
+
306
+ return node.content.reduce((prev, curr) => {
307
+ const { type, marks = [], text } = curr;
308
+
309
+ if (type === "text") {
310
+ let styledText = text || "";
311
+
312
+ // Apply marks in order: textStyle, highlight, bold, italic, link
313
+ // This ensures proper nesting
314
+
315
+ // textStyle (color)
316
+ if (marks.some((mark) => mark.type === "textStyle")) {
317
+ const color = marks.find((mark) => mark.type === "textStyle")
318
+ ?.attrs?.color;
319
+ if (color) {
320
+ styledText = `<span style="color: var(--${color})">${styledText}</span>`;
321
+ }
322
+ }
323
+
324
+ // highlight
325
+ if (marks.some((mark) => mark.type === "highlight")) {
326
+ styledText = `<span style="background-color: var(--highlight)">${styledText}</span>`;
327
+ }
328
+
329
+ // bold
330
+ if (marks.some((mark) => mark.type === "bold")) {
331
+ styledText = `<strong>${styledText}</strong>`;
332
+ }
333
+
334
+ // italic
335
+ if (marks.some((mark) => mark.type === "italic")) {
336
+ styledText = `<em>${styledText}</em>`;
337
+ }
338
+
339
+ // link (outermost)
340
+ if (marks.some((mark) => mark.type === "link")) {
341
+ const linkMark = marks.find((mark) => mark.type === "link");
342
+ const href = linkMark.attrs.href;
343
+ const target = linkMark.attrs.target || "_self";
344
+
345
+ // Check if it's a file link (add download attribute)
346
+ const fileExtensions = [
347
+ "pdf",
348
+ "doc",
349
+ "docx",
350
+ "xls",
351
+ "xlsx",
352
+ "ppt",
353
+ "pptx",
354
+ "jpg",
355
+ "jpeg",
356
+ "png",
357
+ "webp",
358
+ "gif",
359
+ "svg",
360
+ "mp4",
361
+ "mp3",
362
+ "wav",
363
+ "mov",
364
+ "zip",
365
+ ];
366
+ const extension = href.split(".").pop()?.toLowerCase();
367
+ const isFileLink = fileExtensions.includes(extension);
368
+
369
+ styledText = `<a href="${href}" target="${target}"${
370
+ isFileLink ? " download" : ""
371
+ }>${styledText}</a>`;
372
+ }
373
+
374
+ return prev + styledText;
375
+ } else if (type === "hardBreak") {
376
+ return prev + "<br>";
377
+ } else {
378
+ console.warn(`unhandled text content type: ${type}`, curr);
379
+ return prev;
380
+ }
381
+ }, "");
382
+ }
383
+
384
+ function processListItems(node, options = {}) {
385
+ const items = [];
386
+ node.content?.forEach((item) => {
387
+ if (item.type === "listItem") {
388
+ items.push({
389
+ content: item.content
390
+ ?.filter((child) => !child.type.endsWith("List"))
391
+ ?.map((child) => createSequenceElement(child, options)),
392
+ items: item.content
393
+ ?.filter((child) => child.type.endsWith("List"))
394
+ .flatMap((list) => processListItems(list, options)),
395
+ });
396
+ }
397
+ });
398
+
399
+ return items;
400
+ }
401
+
402
+ export { processSequence };
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Extract role information from a node
3
+ * @param {Object} node Node with potential role information
4
+ * @returns {string|null} Role value or null
5
+ */
6
+ function getRoleFromNode(node) {
7
+ // Check different possible locations of role information
8
+ return (
9
+ // Direct role attribute
10
+ node.attrs?.role ||
11
+ // Role in marks
12
+ node.marks?.find((mark) => mark.type === "role")?.attrs?.value ||
13
+ // Default role based on type
14
+ getDefaultRole(node)
15
+ );
16
+ }
17
+
18
+ /**
19
+ * Get default role based on node type and position
20
+ */
21
+ function getDefaultRole(node) {
22
+ switch (node.type) {
23
+ case "image":
24
+ return "content";
25
+ case "link":
26
+ return "link";
27
+ default:
28
+ return null;
29
+ }
30
+ }
31
+
32
+ /**
33
+ * Validate if a role is known for a given type
34
+ */
35
+ function isValidRole(type, role) {
36
+ const validRoles = {
37
+ image: ["background", "content", "gallery", "icon"],
38
+ link: [
39
+ "button",
40
+ "button-primary",
41
+ "button-outline",
42
+ "nav-link",
43
+ "footer-link",
44
+ ],
45
+ };
46
+
47
+ return validRoles[type]?.includes(role) || false;
48
+ }
49
+
50
+ export {
51
+ getRoleFromNode,
52
+ isValidRole,
53
+ };