@uniweb/semantic-parser 1.1.5 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,402 +0,0 @@
1
- /**
2
- * Process a ProseMirror/TipTap document into a flat sequence
3
- * @param {Object} doc ProseMirror document
4
- * @param {Object} options Parsing options
5
- * @returns {Array} Sequence of content elements
6
- */
7
- function processSequence(doc, options = {}) {
8
- const sequence = [];
9
- processNode(doc, sequence, options);
10
-
11
- return sequence;
12
- }
13
-
14
- function processNode(node, sequence, options) {
15
- // Special handling for root doc node
16
- if (node.type === "doc") {
17
- node.content?.forEach((child) => processNode(child, sequence, options));
18
- return;
19
- }
20
-
21
- // Create element based on node type
22
- const element = createSequenceElement(node, options);
23
-
24
- if (element) {
25
- sequence.push(element);
26
- }
27
- }
28
-
29
- function createSequenceElement(node, options = {}) {
30
- function isLink() {
31
- if (node.type === "paragraph" && node.content.length === 1) {
32
- return (
33
- node.content[0].marks?.some((mark) => mark.type === "link") ||
34
- false
35
- );
36
- }
37
- }
38
-
39
- function isStyledLink() {
40
- // Check if paragraph has multiple content parts with same link mark
41
- if (
42
- node.type === "paragraph" &&
43
- node.content &&
44
- node.content.length > 1
45
- ) {
46
- // Filter out icons
47
- const content = node.content.filter(
48
- (c) => c.type !== "UniwebIcon" && c.type !== "image"
49
- );
50
-
51
- if (content.length === 0) return false;
52
-
53
- // Get first link mark
54
- const firstLinkMark = content[0]?.marks?.find(
55
- (mark) => mark.type === "link" && mark.attrs
56
- );
57
- if (!firstLinkMark) return false;
58
-
59
- // Check if all content items have same link mark
60
- const allHaveSameLink = content.every((c) =>
61
- c?.marks?.some(
62
- (mark) =>
63
- mark.type === "link" &&
64
- mark.attrs?.href === firstLinkMark.attrs.href
65
- )
66
- );
67
-
68
- return allHaveSameLink ? firstLinkMark : false;
69
- }
70
- return false;
71
- }
72
-
73
- function isImage() {
74
- if (node.type === "paragraph" && node.content.length === 1) {
75
- return (
76
- node.content[0].type === "image" &&
77
- (node.content[0].attrs.role === "image" ||
78
- node.content[0].attrs.role === "banner")
79
- );
80
- }
81
- }
82
-
83
- function isIcon() {
84
- if (node.type === "paragraph" && node.content.length === 1) {
85
- return (
86
- node.content[0].type === "image" &&
87
- node.content[0].attrs.role === "icon"
88
- );
89
- }
90
- }
91
-
92
- function isButton() {
93
- if (node.type === "paragraph" && node.content.length === 1) {
94
- return (
95
- node.content[0].type === "text" &&
96
- node.content[0].marks?.some((mark) => mark.type === "button")
97
- );
98
- }
99
- }
100
-
101
- function isVideo() {
102
- if (node.type === "paragraph" && node.content.length === 1) {
103
- return (
104
- node.content[0].type === "image" &&
105
- node.content[0].attrs.role === "video"
106
- );
107
- }
108
- }
109
-
110
- // extract pure [type] content from the paragraph node for easier handling in the byGroup processor
111
-
112
- // Check styled link first (multi-part link)
113
- const styledLinkMark = isStyledLink();
114
- if (styledLinkMark) {
115
- // Remove link marks from content, keep other styling
116
- const cleanedContent = node.content
117
- .filter((c) => c.type !== "UniwebIcon" && c.type !== "image")
118
- .map((c) => ({
119
- ...c,
120
- marks: c.marks?.filter((mark) => mark.type !== "link") || [],
121
- }));
122
-
123
- return {
124
- type: "styledLink",
125
- href: styledLinkMark.attrs.href,
126
- target: styledLinkMark.attrs.target || "_self",
127
- content: getTextContent(
128
- { ...node, content: cleanedContent },
129
- options
130
- ),
131
- };
132
- }
133
-
134
- // Simple single-part link
135
- if (isLink()) {
136
- return {
137
- type: "link",
138
- content: {
139
- href: node.content[0].marks.find((mark) => mark.type === "link")
140
- .attrs.href,
141
- label: node.content[0].text,
142
- },
143
- };
144
- }
145
-
146
- if (isImage()) {
147
- return {
148
- type: "image",
149
- src: node.content[0].attrs.src,
150
- caption: node.content[0].attrs.title,
151
- alt: node.content[0].attrs.alt || node.content[0].attrs.title,
152
- role: node.content[0].attrs.role,
153
- };
154
- }
155
-
156
- if (isIcon()) {
157
- return {
158
- type: "icon",
159
- svg: node.content[0].attrs.svg,
160
- };
161
- }
162
-
163
- if (isButton()) {
164
- return {
165
- type: "button",
166
- content: node.content[0].text,
167
- attrs: node.content[0].marks.find((mark) => mark.type === "button")
168
- .attrs,
169
- };
170
- }
171
-
172
- if (isVideo()) {
173
- return {
174
- type: "video",
175
- src: node.content[0].attrs.src,
176
- caption: node.content[0].attrs.title,
177
- alt: node.content[0].attrs.alt || node.content[0].attrs.title,
178
- };
179
- }
180
-
181
- switch (node.type) {
182
- case "heading":
183
- return {
184
- type: "heading",
185
- level: node.attrs.level,
186
- content: getTextContent(node, options),
187
- attrs: node.attrs, // Pass through all attributes (including textAlign)
188
- };
189
-
190
- case "paragraph":
191
- return {
192
- type: "paragraph",
193
- content: getTextContent(node, options),
194
- };
195
-
196
- case "blockquote":
197
- // Process blockquote content recursively
198
- return {
199
- type: "blockquote",
200
- content:
201
- node.content
202
- ?.map((child) => createSequenceElement(child, options))
203
- .filter(Boolean) || [],
204
- };
205
-
206
- case "codeBlock":
207
- const textContent = getTextContent(node, options);
208
- let parsedJson = null;
209
-
210
- if (options.parseCodeAsJson) {
211
- try {
212
- parsedJson = JSON.parse(textContent);
213
- } catch (err) {
214
- // Invalid JSON, keep as string
215
- }
216
- }
217
-
218
- return {
219
- type: "codeBlock",
220
- content: textContent,
221
- parsed: parsedJson,
222
- };
223
-
224
- case "image":
225
- return {
226
- type: "image",
227
- src: node.attrs.src,
228
- alt: node.attrs.alt,
229
- role: node.attrs.role,
230
- };
231
-
232
- case "bulletList":
233
- case "orderedList":
234
- return {
235
- type: "list",
236
- style: node.type === "bulletList" ? "bullet" : "ordered",
237
- items: processListItems(node, options),
238
- };
239
-
240
- case "listItem":
241
- return {
242
- type: "listItem",
243
- content: getTextContent(node, options),
244
- };
245
-
246
- case "horizontalRule":
247
- return {
248
- type: "divider",
249
- };
250
-
251
- // Custom TipTap elements
252
- case "card-group":
253
- return {
254
- type: "card-group",
255
- cards:
256
- node.content
257
- ?.filter((c) => c.type === "card" && !c.attrs?.hidden)
258
- .map((card) => ({
259
- ...card.attrs,
260
- type: "card",
261
- })) || [],
262
- };
263
-
264
- case "document-group":
265
- return {
266
- type: "document-group",
267
- documents:
268
- node.content
269
- ?.filter((c) => c.type === "document")
270
- .map((doc) => ({
271
- ...doc.attrs,
272
- type: "document",
273
- })) || [],
274
- };
275
-
276
- case "FormBlock":
277
- // Parse form data (can be JSON string or object)
278
- let formData = node.attrs?.data;
279
- if (typeof formData === "string") {
280
- try {
281
- formData = JSON.parse(formData);
282
- } catch (err) {
283
- // Keep as string
284
- }
285
- }
286
- return {
287
- type: "form",
288
- data: formData,
289
- attrs: node.attrs,
290
- };
291
-
292
- case "text":
293
- return null;
294
-
295
- default:
296
- return {
297
- type: node.type,
298
- content: getTextContent(node, options),
299
- };
300
- }
301
- }
302
-
303
- function getTextContent(node, options = {}) {
304
- if (!node.content) return "";
305
-
306
- return node.content.reduce((prev, curr) => {
307
- const { type, marks = [], text } = curr;
308
-
309
- if (type === "text") {
310
- let styledText = text || "";
311
-
312
- // Apply marks in order: textStyle, highlight, bold, italic, link
313
- // This ensures proper nesting
314
-
315
- // textStyle (color)
316
- if (marks.some((mark) => mark.type === "textStyle")) {
317
- const color = marks.find((mark) => mark.type === "textStyle")
318
- ?.attrs?.color;
319
- if (color) {
320
- styledText = `<span style="color: var(--${color})">${styledText}</span>`;
321
- }
322
- }
323
-
324
- // highlight
325
- if (marks.some((mark) => mark.type === "highlight")) {
326
- styledText = `<span style="background-color: var(--highlight)">${styledText}</span>`;
327
- }
328
-
329
- // bold
330
- if (marks.some((mark) => mark.type === "bold")) {
331
- styledText = `<strong>${styledText}</strong>`;
332
- }
333
-
334
- // italic
335
- if (marks.some((mark) => mark.type === "italic")) {
336
- styledText = `<em>${styledText}</em>`;
337
- }
338
-
339
- // link (outermost)
340
- if (marks.some((mark) => mark.type === "link")) {
341
- const linkMark = marks.find((mark) => mark.type === "link");
342
- const href = linkMark.attrs.href;
343
- const target = linkMark.attrs.target || "_self";
344
-
345
- // Check if it's a file link (add download attribute)
346
- const fileExtensions = [
347
- "pdf",
348
- "doc",
349
- "docx",
350
- "xls",
351
- "xlsx",
352
- "ppt",
353
- "pptx",
354
- "jpg",
355
- "jpeg",
356
- "png",
357
- "webp",
358
- "gif",
359
- "svg",
360
- "mp4",
361
- "mp3",
362
- "wav",
363
- "mov",
364
- "zip",
365
- ];
366
- const extension = href.split(".").pop()?.toLowerCase();
367
- const isFileLink = fileExtensions.includes(extension);
368
-
369
- styledText = `<a href="${href}" target="${target}"${
370
- isFileLink ? " download" : ""
371
- }>${styledText}</a>`;
372
- }
373
-
374
- return prev + styledText;
375
- } else if (type === "hardBreak") {
376
- return prev + "<br>";
377
- } else {
378
- console.warn(`unhandled text content type: ${type}`, curr);
379
- return prev;
380
- }
381
- }, "");
382
- }
383
-
384
- function processListItems(node, options = {}) {
385
- const items = [];
386
- node.content?.forEach((item) => {
387
- if (item.type === "listItem") {
388
- items.push({
389
- content: item.content
390
- ?.filter((child) => !child.type.endsWith("List"))
391
- ?.map((child) => createSequenceElement(child, options)),
392
- items: item.content
393
- ?.filter((child) => child.type.endsWith("List"))
394
- .flatMap((list) => processListItems(list, options)),
395
- });
396
- }
397
- });
398
-
399
- return items;
400
- }
401
-
402
- export { processSequence };
@@ -1,129 +0,0 @@
1
- /**
2
- * Organize content elements by their type while preserving context
3
- * @param {Array} sequence Flat sequence of elements
4
- * @returns {Object} Content organized by type
5
- */
6
- function processByType(sequence) {
7
- const collections = {
8
- headings: [],
9
- paragraphs: [],
10
- images: {
11
- background: [],
12
- content: [],
13
- gallery: [],
14
- icon: [],
15
- },
16
- lists: [],
17
- dividers: [],
18
- metadata: {
19
- totalElements: sequence.length,
20
- dominantType: null,
21
- hasMedia: false,
22
- },
23
- };
24
-
25
- // Track type frequencies for metadata
26
- const typeFrequency = new Map();
27
-
28
- sequence.forEach((element, index) => {
29
- // Track element type frequency
30
- typeFrequency.set(element.type, (typeFrequency.get(element.type) || 0) + 1);
31
-
32
- // Add context information
33
- const context = getElementContext(sequence, index);
34
- const enrichedElement = { ...element, context };
35
-
36
- // Process element based on type
37
- switch (element.type) {
38
- case "heading":
39
- collections.headings.push(enrichedElement);
40
- break;
41
-
42
- case "paragraph":
43
- collections.paragraphs.push(enrichedElement);
44
- break;
45
-
46
- case "image": {
47
- const role = element.role || "content";
48
- if (!collections.images[role]) {
49
- collections.images[role] = [];
50
- }
51
- collections.images[role].push(enrichedElement);
52
- collections.metadata.hasMedia = true;
53
- break;
54
- }
55
-
56
- case "list":
57
- collections.lists.push(enrichedElement);
58
- break;
59
-
60
- case "divider":
61
- collections.dividers.push(enrichedElement);
62
- break;
63
- }
64
- });
65
-
66
- // Calculate dominant type
67
- let maxFrequency = 0;
68
- typeFrequency.forEach((frequency, type) => {
69
- if (frequency > maxFrequency) {
70
- maxFrequency = frequency;
71
- collections.metadata.dominantType = type;
72
- }
73
- });
74
-
75
- // Add helper methods
76
- addCollectionHelpers(collections);
77
-
78
- return collections;
79
- }
80
-
81
- /**
82
- * Get context information for an element
83
- */
84
- function getElementContext(sequence, position) {
85
- const context = {
86
- position,
87
- previousElement: position > 0 ? sequence[position - 1] : null,
88
- nextElement: position < sequence.length - 1 ? sequence[position + 1] : null,
89
- nearestHeading: null,
90
- };
91
-
92
- // Find nearest preceding heading
93
- for (let i = position - 1; i >= 0; i--) {
94
- if (sequence[i].type === "heading") {
95
- context.nearestHeading = sequence[i];
96
- break;
97
- }
98
- }
99
-
100
- return context;
101
- }
102
-
103
- /**
104
- * Add helper methods to collections
105
- */
106
- function addCollectionHelpers(collections) {
107
- // Get headings of specific level
108
- collections.getHeadingsByLevel = function (level) {
109
- return this.headings.filter((h) => h.level === level);
110
- };
111
-
112
- // Get elements by heading context
113
- collections.getElementsByHeadingContext = function (headingFilter) {
114
- const allElements = [
115
- ...this.paragraphs,
116
- ...Object.values(this.images).flat(),
117
- ...this.lists,
118
- ];
119
-
120
- return allElements.filter(
121
- (el) =>
122
- el.context?.nearestHeading && headingFilter(el.context.nearestHeading)
123
- );
124
- };
125
- }
126
-
127
- module.exports = {
128
- processByType,
129
- };