@portabletext/block-tools 4.0.2 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/_chunks-es/helpers.js +1 -64
- package/lib/_chunks-es/helpers.js.map +1 -1
- package/lib/index.js +487 -38
- package/lib/index.js.map +1 -1
- package/package.json +9 -9
- package/src/HtmlDeserializer/helpers.ts +1 -183
- package/src/HtmlDeserializer/index.ts +14 -25
- package/src/HtmlDeserializer/preprocessors/index.ts +8 -6
- package/src/HtmlDeserializer/preprocessors/{gdocs.ts → preprocessor.gdocs.ts} +2 -22
- package/src/HtmlDeserializer/preprocessors/{html.ts → preprocessor.html.ts} +1 -1
- package/src/HtmlDeserializer/preprocessors/{notion.ts → preprocessor.notion.ts} +1 -1
- package/src/HtmlDeserializer/preprocessors/{whitespace.ts → preprocessor.whitespace.ts} +28 -3
- package/src/HtmlDeserializer/preprocessors/{word.ts → preprocessor.word.ts} +1 -1
- package/src/HtmlDeserializer/rules/index.ts +6 -4
- package/src/HtmlDeserializer/rules/{gdocs.ts → rules.gdocs.ts} +1 -1
- package/src/HtmlDeserializer/rules/{html.ts → rules.html.ts} +3 -3
- package/src/HtmlDeserializer/rules/{notion.ts → rules.notion.ts} +1 -1
- package/src/HtmlDeserializer/rules/rules.word.ts +95 -0
- package/src/HtmlDeserializer/trim-whitespace.ts +157 -0
- package/src/HtmlDeserializer/word-online/asserters.word-online.ts +153 -0
- package/src/HtmlDeserializer/word-online/preprocessor.word-online.ts +263 -0
- package/src/HtmlDeserializer/word-online/rules.word-online.ts +390 -0
- package/src/HtmlDeserializer/rules/word.ts +0 -59
- /package/src/HtmlDeserializer/rules/{whitespace-text-node.ts → rules.whitespace-text-node.ts} +0 -0
package/lib/index.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { sanitySchemaToPortableTextSchema } from "@portabletext/sanity-bridge";
|
|
2
2
|
import { isTextBlock, isSpan } from "@portabletext/schema";
|
|
3
3
|
import flatten from "lodash/flatten.js";
|
|
4
|
-
import {
|
|
4
|
+
import { isElement, tagName, PRESERVE_WHITESPACE_TAGS, HTML_BLOCK_TAGS, HTML_HEADER_TAGS, DEFAULT_SPAN, DEFAULT_BLOCK, BLOCK_DEFAULT_STYLE, HTML_LIST_CONTAINER_TAGS, HTML_SPAN_TAGS, HTML_LIST_ITEM_TAGS, HTML_DECORATOR_TAGS, isMinimalSpan, defaultParseHtml, flattenNestedBlocks, ensureRootIsBlocks, resolveJsType, isPlaceholderDecorator, isPlaceholderAnnotation, isMinimalBlock, isNodeList } from "./_chunks-es/helpers.js";
|
|
5
5
|
import isEqual from "lodash/isEqual.js";
|
|
6
6
|
var s = { 0: 8203, 1: 8204, 2: 8205, 3: 8290, 4: 8291, 5: 8288, 6: 65279, 7: 8289, 8: 119155, 9: 119156, a: 119157, b: 119158, c: 119159, d: 119160, e: 119161, f: 119162 }, c = { 0: 8203, 1: 8204, 2: 8205, 3: 65279 };
|
|
7
7
|
new Array(4).fill(String.fromCodePoint(c[0])).join("");
|
|
@@ -28,13 +28,203 @@ for (let i = 0; i < 256; ++i)
|
|
|
28
28
|
function randomKey(length) {
|
|
29
29
|
return whatwgRNG(length).reduce((str, n) => str + byteToHex[n], "").slice(0, length);
|
|
30
30
|
}
|
|
31
|
+
function isWordOnlineHtml(html) {
|
|
32
|
+
return /class="(?:TextRun|NormalTextRun)[^"]*SCXW\d+[^"]*BCX\d+/.test(html) || /class="EOP[^"]*SCXW\d+/.test(html);
|
|
33
|
+
}
|
|
34
|
+
function isWordOnlineTextRun(el) {
|
|
35
|
+
return !isElement(el) || tagName(el) !== "span" ? !1 : el.classList.contains("TextRun") && !el.classList.contains("EOP");
|
|
36
|
+
}
|
|
37
|
+
function isNormalTextRun(el) {
|
|
38
|
+
return !isElement(el) || tagName(el) !== "span" ? !1 : el.classList.contains("NormalTextRun");
|
|
39
|
+
}
|
|
40
|
+
function isTextRunSpan(el) {
|
|
41
|
+
return !isElement(el) || tagName(el) !== "span" ? !1 : el.classList.contains("TextRun") && !el.classList.contains("NormalTextRun") && !el.classList.contains("EOP");
|
|
42
|
+
}
|
|
43
|
+
function isFindHit(el) {
|
|
44
|
+
return !isElement(el) || tagName(el) !== "span" ? !1 : el.classList.contains("FindHit");
|
|
45
|
+
}
|
|
46
|
+
function isInHeading(el) {
|
|
47
|
+
let current = el;
|
|
48
|
+
for (; current; ) {
|
|
49
|
+
if (isElement(current) && tagName(current) === "word-online-block" && /^heading \d$/.test(current.getAttribute("data-parastyle") ?? ""))
|
|
50
|
+
return !0;
|
|
51
|
+
current = current.parentNode;
|
|
52
|
+
}
|
|
53
|
+
return !1;
|
|
54
|
+
}
|
|
55
|
+
function isInBlockquote(el) {
|
|
56
|
+
let current = el;
|
|
57
|
+
for (; current; ) {
|
|
58
|
+
if (isElement(current) && tagName(current) === "word-online-block" && current.getAttribute("data-parastyle") === "Quote")
|
|
59
|
+
return !0;
|
|
60
|
+
current = current.parentNode;
|
|
61
|
+
}
|
|
62
|
+
return !1;
|
|
63
|
+
}
|
|
64
|
+
function hasStrongFormatting(el) {
|
|
65
|
+
const style = el.getAttribute("style") ?? "";
|
|
66
|
+
return el.classList.contains("MacChromeBold") || /font-weight\s*:\s*bold/.test(style);
|
|
67
|
+
}
|
|
68
|
+
function hasEmphasisFormatting(el) {
|
|
69
|
+
const style = el.getAttribute("style") ?? "";
|
|
70
|
+
return /font-style\s*:\s*italic/.test(style);
|
|
71
|
+
}
|
|
72
|
+
function hasUnderlineFormatting(el) {
|
|
73
|
+
const style = el.getAttribute("style") ?? "";
|
|
74
|
+
return el.classList.contains("Underlined") || /text-decoration\s*:\s*underline/.test(style);
|
|
75
|
+
}
|
|
76
|
+
function hasStrikethroughFormatting(el) {
|
|
77
|
+
const style = el.getAttribute("style") ?? "";
|
|
78
|
+
return el.classList.contains("Strikethrough") || /text-decoration\s*:\s*line-through/.test(style);
|
|
79
|
+
}
|
|
80
|
+
function hasFormatting(el) {
|
|
81
|
+
return hasStrongFormatting(el) || hasEmphasisFormatting(el) || hasUnderlineFormatting(el) || hasStrikethroughFormatting(el);
|
|
82
|
+
}
|
|
83
|
+
function preprocessWordOnline(html, doc) {
|
|
84
|
+
if (!isWordOnlineHtml(html))
|
|
85
|
+
return doc;
|
|
86
|
+
const paragraphs = Array.from(
|
|
87
|
+
doc.querySelectorAll('p.Paragraph[role="heading"]')
|
|
88
|
+
);
|
|
89
|
+
for (const paragraph of paragraphs) {
|
|
90
|
+
const ariaLevel = paragraph.getAttribute("aria-level");
|
|
91
|
+
if (ariaLevel) {
|
|
92
|
+
const wrapper = doc.createElement("word-online-block");
|
|
93
|
+
wrapper.setAttribute("data-parastyle", `heading ${ariaLevel}`);
|
|
94
|
+
const parent = paragraph.parentNode;
|
|
95
|
+
if (parent) {
|
|
96
|
+
for (parent.insertBefore(wrapper, paragraph); paragraph.firstChild; )
|
|
97
|
+
wrapper.appendChild(paragraph.firstChild);
|
|
98
|
+
parent.removeChild(paragraph);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
let child = doc.body.firstChild;
|
|
103
|
+
for (; child; ) {
|
|
104
|
+
const next = child.nextSibling;
|
|
105
|
+
if (!isElement(child) || !tagName(child)?.includes("span")) {
|
|
106
|
+
child = next;
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
const paraStyle = getParaStyle(child);
|
|
110
|
+
if (!paraStyle) {
|
|
111
|
+
child = next;
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
const group = [child];
|
|
115
|
+
let sibling = next;
|
|
116
|
+
for (; sibling && !(!isElement(sibling) || getParaStyle(sibling) !== paraStyle); )
|
|
117
|
+
group.push(sibling), sibling = sibling.nextSibling;
|
|
118
|
+
const wrapper = doc.createElement("word-online-block");
|
|
119
|
+
wrapper.setAttribute("data-parastyle", paraStyle), doc.body.insertBefore(wrapper, child);
|
|
120
|
+
for (const span of group)
|
|
121
|
+
wrapper.appendChild(span);
|
|
122
|
+
child = sibling;
|
|
123
|
+
}
|
|
124
|
+
const textRunSpans = Array.from(doc.body.querySelectorAll("span")).filter(
|
|
125
|
+
isTextRunSpan
|
|
126
|
+
);
|
|
127
|
+
for (const textRunSpan of textRunSpans) {
|
|
128
|
+
const normalTextRuns = Array.from(textRunSpan.childNodes).filter(
|
|
129
|
+
isNormalTextRun
|
|
130
|
+
);
|
|
131
|
+
for (const normalTextRun of normalTextRuns) {
|
|
132
|
+
let foundNestedSpan = !0;
|
|
133
|
+
for (; foundNestedSpan; ) {
|
|
134
|
+
const children = Array.from(normalTextRun.childNodes), nestedSpanIndex = children.findIndex(
|
|
135
|
+
(node) => isElement(node) && tagName(node) === "span" && node.textContent.trim() === ""
|
|
136
|
+
);
|
|
137
|
+
if (nestedSpanIndex === -1) {
|
|
138
|
+
foundNestedSpan = !1;
|
|
139
|
+
break;
|
|
140
|
+
}
|
|
141
|
+
const nestedSpan = children.at(nestedSpanIndex);
|
|
142
|
+
if (!nestedSpan) {
|
|
143
|
+
foundNestedSpan = !1;
|
|
144
|
+
break;
|
|
145
|
+
}
|
|
146
|
+
const spaceText = nestedSpan.textContent?.replace(/\u00a0/g, " ") ?? "", isSpaceAtBeginning = !children.slice(0, nestedSpanIndex).some((n) => n.nodeType === 3);
|
|
147
|
+
if (normalTextRun.removeChild(nestedSpan), isSpaceAtBeginning) {
|
|
148
|
+
const firstTextNode = Array.from(normalTextRun.childNodes).find(
|
|
149
|
+
(n) => n.nodeType === 3
|
|
150
|
+
);
|
|
151
|
+
if (firstTextNode)
|
|
152
|
+
firstTextNode.textContent = spaceText + (firstTextNode.textContent || "");
|
|
153
|
+
else {
|
|
154
|
+
const spaceNode = doc.createTextNode(spaceText);
|
|
155
|
+
normalTextRun.insertBefore(spaceNode, normalTextRun.firstChild);
|
|
156
|
+
}
|
|
157
|
+
} else {
|
|
158
|
+
const nextSibling = textRunSpan.nextSibling, currentHasFormatting = hasFormatting(textRunSpan);
|
|
159
|
+
if (nextSibling && isElement(nextSibling) && isTextRunSpan(nextSibling)) {
|
|
160
|
+
const nextHasFormatting = hasFormatting(nextSibling);
|
|
161
|
+
if (currentHasFormatting && !nextHasFormatting) {
|
|
162
|
+
const nextNormalTextRun = Array.from(nextSibling.childNodes).find(
|
|
163
|
+
isNormalTextRun
|
|
164
|
+
);
|
|
165
|
+
if (nextNormalTextRun && isElement(nextNormalTextRun)) {
|
|
166
|
+
const firstChild = nextNormalTextRun.firstChild;
|
|
167
|
+
if (firstChild && firstChild.nodeType === 3)
|
|
168
|
+
firstChild.textContent = spaceText + (firstChild.textContent ?? "");
|
|
169
|
+
else {
|
|
170
|
+
const spaceNode = doc.createTextNode(spaceText);
|
|
171
|
+
nextNormalTextRun.insertBefore(
|
|
172
|
+
spaceNode,
|
|
173
|
+
nextNormalTextRun.firstChild
|
|
174
|
+
);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
} else {
|
|
178
|
+
const lastTextNode = Array.from(normalTextRun.childNodes).find(
|
|
179
|
+
(n) => n.nodeType === 3
|
|
180
|
+
);
|
|
181
|
+
if (lastTextNode)
|
|
182
|
+
lastTextNode.textContent = (lastTextNode.textContent ?? "") + spaceText;
|
|
183
|
+
else {
|
|
184
|
+
const spaceNode = doc.createTextNode(spaceText);
|
|
185
|
+
normalTextRun.appendChild(spaceNode);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
} else {
|
|
189
|
+
const lastTextNode = Array.from(normalTextRun.childNodes).find(
|
|
190
|
+
(n) => n.nodeType === 3
|
|
191
|
+
);
|
|
192
|
+
if (lastTextNode)
|
|
193
|
+
lastTextNode.textContent = (lastTextNode.textContent ?? "") + spaceText;
|
|
194
|
+
else {
|
|
195
|
+
const spaceNode = doc.createTextNode(spaceText);
|
|
196
|
+
normalTextRun.appendChild(spaceNode);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
return doc;
|
|
204
|
+
}
|
|
205
|
+
function getParaStyle(element) {
|
|
206
|
+
const directStyle = element.getAttribute("data-ccp-parastyle");
|
|
207
|
+
if (directStyle)
|
|
208
|
+
return directStyle;
|
|
209
|
+
if (tagName(element) === "span" && element.classList.contains("TextRun")) {
|
|
210
|
+
const normalTextRuns = Array.from(
|
|
211
|
+
element.querySelectorAll(".NormalTextRun")
|
|
212
|
+
);
|
|
213
|
+
if (normalTextRuns.length > 0) {
|
|
214
|
+
const firstStyle = normalTextRuns[0].getAttribute("data-ccp-parastyle");
|
|
215
|
+
if (firstStyle && normalTextRuns.every(
|
|
216
|
+
(normalTextRun) => normalTextRun.getAttribute("data-ccp-parastyle") === firstStyle
|
|
217
|
+
))
|
|
218
|
+
return firstStyle;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
}
|
|
31
222
|
const _XPathResult = {
|
|
32
223
|
BOOLEAN_TYPE: 3,
|
|
33
224
|
ORDERED_NODE_ITERATOR_TYPE: 5,
|
|
34
225
|
UNORDERED_NODE_SNAPSHOT_TYPE: 6
|
|
35
226
|
};
|
|
36
|
-
|
|
37
|
-
const whitespaceOnPasteMode = options?.unstable_whitespaceOnPasteMode || "preserve";
|
|
227
|
+
function preprocessGDocs(_html, doc) {
|
|
38
228
|
let gDocsRootOrSiblingNode = doc.evaluate(
|
|
39
229
|
'//*[@id and contains(@id, "docs-internal-guid")]',
|
|
40
230
|
doc,
|
|
@@ -44,14 +234,7 @@ var preprocessGDocs = (_html, doc, options) => {
|
|
|
44
234
|
).iterateNext();
|
|
45
235
|
if (gDocsRootOrSiblingNode) {
|
|
46
236
|
const isWrappedRootTag = tagName(gDocsRootOrSiblingNode) === "b";
|
|
47
|
-
|
|
48
|
-
case "normalize":
|
|
49
|
-
normalizeWhitespace(gDocsRootOrSiblingNode);
|
|
50
|
-
break;
|
|
51
|
-
case "remove":
|
|
52
|
-
removeAllWhitespace(gDocsRootOrSiblingNode);
|
|
53
|
-
break;
|
|
54
|
-
}
|
|
237
|
+
isWrappedRootTag || (gDocsRootOrSiblingNode = doc.body);
|
|
55
238
|
const childNodes = doc.evaluate(
|
|
56
239
|
"//*",
|
|
57
240
|
doc,
|
|
@@ -68,7 +251,7 @@ var preprocessGDocs = (_html, doc, options) => {
|
|
|
68
251
|
), doc;
|
|
69
252
|
}
|
|
70
253
|
return doc;
|
|
71
|
-
}
|
|
254
|
+
}
|
|
72
255
|
const unwantedWordDocumentPaths = [
|
|
73
256
|
"/html/text()",
|
|
74
257
|
"/html/head/text()",
|
|
@@ -82,7 +265,7 @@ const unwantedWordDocumentPaths = [
|
|
|
82
265
|
"//meta",
|
|
83
266
|
"//link"
|
|
84
267
|
];
|
|
85
|
-
|
|
268
|
+
function preprocessHTML(_html, doc) {
|
|
86
269
|
const bodyTextNodes = doc.evaluate(
|
|
87
270
|
"/html/body/text()",
|
|
88
271
|
doc,
|
|
@@ -110,7 +293,8 @@ var preprocessHTML = (_html, doc) => {
|
|
|
110
293
|
unwanted && unwanted.parentNode?.removeChild(unwanted);
|
|
111
294
|
}
|
|
112
295
|
return doc;
|
|
113
|
-
}
|
|
296
|
+
}
|
|
297
|
+
function preprocessNotion(html, doc) {
|
|
114
298
|
const NOTION_REGEX = /<!-- notionvc:.*?-->/g;
|
|
115
299
|
if (html.match(NOTION_REGEX)) {
|
|
116
300
|
const childNodes = doc.evaluate(
|
|
@@ -125,18 +309,30 @@ var preprocessHTML = (_html, doc) => {
|
|
|
125
309
|
return doc;
|
|
126
310
|
}
|
|
127
311
|
return doc;
|
|
128
|
-
}
|
|
312
|
+
}
|
|
313
|
+
const BLOCK_CONTAINER_ELEMENTS = [
|
|
314
|
+
"body",
|
|
315
|
+
"table",
|
|
316
|
+
"tbody",
|
|
317
|
+
"thead",
|
|
318
|
+
"tfoot",
|
|
319
|
+
"tr",
|
|
320
|
+
"ul",
|
|
321
|
+
"ol"
|
|
322
|
+
];
|
|
323
|
+
function preprocessWhitespace(_2, doc) {
|
|
129
324
|
function processNode(node) {
|
|
130
325
|
if (node.nodeType === _XPathResult.BOOLEAN_TYPE && !PRESERVE_WHITESPACE_TAGS.includes(
|
|
131
326
|
node.parentElement?.tagName.toLowerCase() || ""
|
|
132
|
-
))
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
327
|
+
)) {
|
|
328
|
+
const normalized = node.textContent?.replace(/\s\s+/g, " ").replace(/[\r\n]+/g, " ") || "", parentTag = node.parentElement?.tagName.toLowerCase();
|
|
329
|
+
parentTag && BLOCK_CONTAINER_ELEMENTS.includes(parentTag) && normalized.trim() === "" ? node.parentNode?.removeChild(node) : node.textContent = normalized;
|
|
330
|
+
} else
|
|
331
|
+
for (let i = node.childNodes.length - 1; i >= 0; i--)
|
|
136
332
|
processNode(node.childNodes[i]);
|
|
137
333
|
}
|
|
138
334
|
return processNode(doc.body), doc;
|
|
139
|
-
}
|
|
335
|
+
}
|
|
140
336
|
const WORD_HTML_REGEX = /(class="?Mso|style=(?:"|')[^"]*?\bmso-|w:WordDocument|<o:\w+>|<\/font>)/, unwantedPaths = [
|
|
141
337
|
"//o:p",
|
|
142
338
|
"//span[@style='mso-list:Ignore']",
|
|
@@ -160,7 +356,7 @@ const WORD_HTML_REGEX = /(class="?Mso|style=(?:"|')[^"]*?\bmso-|w:WordDocument|<
|
|
|
160
356
|
function isWordHtml(html) {
|
|
161
357
|
return WORD_HTML_REGEX.test(html);
|
|
162
358
|
}
|
|
163
|
-
|
|
359
|
+
function preprocessWord(html, doc) {
|
|
164
360
|
if (!isWordHtml(html))
|
|
165
361
|
return doc;
|
|
166
362
|
const unwantedNodes = doc.evaluate(
|
|
@@ -192,13 +388,201 @@ var preprocessWord = (html, doc) => {
|
|
|
192
388
|
}), child.appendChild(text), mappedElm?.parentNode?.replaceChild(parentElement, mappedElm);
|
|
193
389
|
}
|
|
194
390
|
return doc;
|
|
195
|
-
}
|
|
391
|
+
}
|
|
392
|
+
const preprocessors = [
|
|
196
393
|
preprocessWhitespace,
|
|
197
394
|
preprocessNotion,
|
|
198
395
|
preprocessWord,
|
|
396
|
+
preprocessWordOnline,
|
|
199
397
|
preprocessGDocs,
|
|
200
398
|
preprocessHTML
|
|
201
399
|
];
|
|
400
|
+
function mapParaStyleToBlockStyle(schema, paraStyle) {
|
|
401
|
+
const blockStyle = {
|
|
402
|
+
"heading 1": "h1",
|
|
403
|
+
"heading 2": "h2",
|
|
404
|
+
"heading 3": "h3",
|
|
405
|
+
"heading 4": "h4",
|
|
406
|
+
"heading 5": "h5",
|
|
407
|
+
"heading 6": "h6",
|
|
408
|
+
Quote: "blockquote"
|
|
409
|
+
}[paraStyle] ?? "normal";
|
|
410
|
+
return schema.styles.find((style) => style.name === blockStyle)?.name;
|
|
411
|
+
}
|
|
412
|
+
function createWordOnlineRules(schema, options) {
|
|
413
|
+
return [
|
|
414
|
+
// Image rule - handles bare Word Online <img> tags with WACImage class
|
|
415
|
+
{
|
|
416
|
+
deserialize(el) {
|
|
417
|
+
if (!isElement(el) || tagName(el) !== "img")
|
|
418
|
+
return;
|
|
419
|
+
const classNameRaw = el.className;
|
|
420
|
+
let className = "";
|
|
421
|
+
if (typeof classNameRaw == "string" ? className = classNameRaw : classNameRaw && typeof classNameRaw == "object" && (className = classNameRaw.baseVal || ""), !className.includes("WACImage"))
|
|
422
|
+
return;
|
|
423
|
+
const src = el.getAttribute("src") ?? void 0, alt = el.getAttribute("alt") ?? void 0, props = Object.fromEntries(
|
|
424
|
+
Array.from(el.attributes).map((attr) => [attr.name, attr.value])
|
|
425
|
+
), image = options.matchers?.image?.({
|
|
426
|
+
context: {
|
|
427
|
+
schema,
|
|
428
|
+
keyGenerator: options.keyGenerator ?? keyGenerator
|
|
429
|
+
},
|
|
430
|
+
props: {
|
|
431
|
+
...props,
|
|
432
|
+
...src ? { src } : {},
|
|
433
|
+
...alt ? { alt } : {}
|
|
434
|
+
}
|
|
435
|
+
});
|
|
436
|
+
if (image)
|
|
437
|
+
return {
|
|
438
|
+
_type: "__block",
|
|
439
|
+
block: image
|
|
440
|
+
};
|
|
441
|
+
}
|
|
442
|
+
},
|
|
443
|
+
// Image rule - handles Word Online images wrapped in WACImageContainer
|
|
444
|
+
{
|
|
445
|
+
deserialize(el) {
|
|
446
|
+
if (!isElement(el))
|
|
447
|
+
return;
|
|
448
|
+
const classNameRaw = el.className;
|
|
449
|
+
let className = "";
|
|
450
|
+
if (typeof classNameRaw == "string" ? className = classNameRaw : classNameRaw && typeof classNameRaw == "object" && (className = classNameRaw.baseVal || ""), !className.includes("WACImageContainer"))
|
|
451
|
+
return;
|
|
452
|
+
const img = el.querySelector("img");
|
|
453
|
+
if (!img)
|
|
454
|
+
return;
|
|
455
|
+
const src = img.getAttribute("src") ?? void 0, alt = img.getAttribute("alt") ?? void 0, props = Object.fromEntries(
|
|
456
|
+
Array.from(img.attributes).map((attr) => [attr.name, attr.value])
|
|
457
|
+
), isInsideListItem = el.closest("li") !== null;
|
|
458
|
+
if (el.closest("p") === null || isInsideListItem) {
|
|
459
|
+
const inlineImage = options.matchers?.inlineImage?.({
|
|
460
|
+
context: {
|
|
461
|
+
schema,
|
|
462
|
+
keyGenerator: options.keyGenerator ?? keyGenerator
|
|
463
|
+
},
|
|
464
|
+
props: {
|
|
465
|
+
...props,
|
|
466
|
+
...src ? { src } : {},
|
|
467
|
+
...alt ? { alt } : {}
|
|
468
|
+
}
|
|
469
|
+
});
|
|
470
|
+
if (inlineImage)
|
|
471
|
+
return inlineImage;
|
|
472
|
+
}
|
|
473
|
+
const image = options.matchers?.image?.({
|
|
474
|
+
context: {
|
|
475
|
+
schema,
|
|
476
|
+
keyGenerator: options.keyGenerator ?? keyGenerator
|
|
477
|
+
},
|
|
478
|
+
props: {
|
|
479
|
+
...props,
|
|
480
|
+
...src ? { src } : {},
|
|
481
|
+
...alt ? { alt } : {}
|
|
482
|
+
}
|
|
483
|
+
});
|
|
484
|
+
if (image)
|
|
485
|
+
return {
|
|
486
|
+
_type: "__block",
|
|
487
|
+
block: image
|
|
488
|
+
};
|
|
489
|
+
}
|
|
490
|
+
},
|
|
491
|
+
// List item rule - handles <li> elements with aria-level
|
|
492
|
+
{
|
|
493
|
+
deserialize(el, next) {
|
|
494
|
+
if (!isElement(el) || tagName(el) !== "li")
|
|
495
|
+
return;
|
|
496
|
+
const ariaLevel = el.getAttribute("data-aria-level");
|
|
497
|
+
if (!ariaLevel)
|
|
498
|
+
return;
|
|
499
|
+
const listItem = tagName(el.parentNode) === "ol" ? "number" : "bullet";
|
|
500
|
+
let childNodesToProcess = el.childNodes, blockStyle = "normal";
|
|
501
|
+
if (el.childNodes.length === 1 && el.firstChild && isElement(el.firstChild)) {
|
|
502
|
+
const childTag = tagName(el.firstChild);
|
|
503
|
+
if (childTag && (HTML_BLOCK_TAGS[childTag] || HTML_HEADER_TAGS[childTag] || childTag === "word-online-block")) {
|
|
504
|
+
if (childTag === "word-online-block") {
|
|
505
|
+
const paraStyle = el.firstChild.getAttribute("data-parastyle"), foundBlockStyle = paraStyle ? mapParaStyleToBlockStyle(schema, paraStyle) : void 0;
|
|
506
|
+
foundBlockStyle && (blockStyle = foundBlockStyle);
|
|
507
|
+
}
|
|
508
|
+
childNodesToProcess = el.firstChild.childNodes;
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
const children = next(childNodesToProcess);
|
|
512
|
+
let childArray = Array.isArray(children) ? children : [children].filter(Boolean);
|
|
513
|
+
for (; childArray.length > 0; ) {
|
|
514
|
+
const lastChild = childArray[childArray.length - 1];
|
|
515
|
+
if (lastChild && typeof lastChild == "object" && "text" in lastChild) {
|
|
516
|
+
const text = lastChild.text.trimEnd();
|
|
517
|
+
if (text === "")
|
|
518
|
+
childArray = childArray.slice(0, -1);
|
|
519
|
+
else if (text !== lastChild.text) {
|
|
520
|
+
lastChild.text = text;
|
|
521
|
+
break;
|
|
522
|
+
} else
|
|
523
|
+
break;
|
|
524
|
+
} else
|
|
525
|
+
break;
|
|
526
|
+
}
|
|
527
|
+
return {
|
|
528
|
+
_type: schema.block.name,
|
|
529
|
+
children: childArray,
|
|
530
|
+
markDefs: [],
|
|
531
|
+
style: blockStyle,
|
|
532
|
+
listItem,
|
|
533
|
+
level: parseInt(ariaLevel, 10)
|
|
534
|
+
};
|
|
535
|
+
}
|
|
536
|
+
},
|
|
537
|
+
// Block style rule - handles paragraph styles like Quote
|
|
538
|
+
// The preprocessor wraps grouped NormalTextRun spans in a word-online-block element
|
|
539
|
+
{
|
|
540
|
+
deserialize(el, next) {
|
|
541
|
+
if (!isElement(el))
|
|
542
|
+
return;
|
|
543
|
+
const paraStyle = el.getAttribute("data-parastyle"), blockStyle = paraStyle ? mapParaStyleToBlockStyle(schema, paraStyle) : void 0;
|
|
544
|
+
if (!blockStyle)
|
|
545
|
+
return;
|
|
546
|
+
const children = next(el.childNodes);
|
|
547
|
+
return {
|
|
548
|
+
_type: schema.block.name,
|
|
549
|
+
style: blockStyle,
|
|
550
|
+
markDefs: [],
|
|
551
|
+
children: Array.isArray(children) ? children : children ? [children] : []
|
|
552
|
+
};
|
|
553
|
+
}
|
|
554
|
+
},
|
|
555
|
+
// TextRun rule
|
|
556
|
+
{
|
|
557
|
+
deserialize(el) {
|
|
558
|
+
if (isWordOnlineTextRun(el)) {
|
|
559
|
+
if (!isElement(el) || !el.textContent)
|
|
560
|
+
return;
|
|
561
|
+
const text = Array.from(el.childNodes).filter(
|
|
562
|
+
(node) => isNormalTextRun(node) || isFindHit(node)
|
|
563
|
+
).map((span2) => isElement(span2) ? span2.textContent ?? "" : "").join("");
|
|
564
|
+
if (!text)
|
|
565
|
+
return;
|
|
566
|
+
const span = {
|
|
567
|
+
...DEFAULT_SPAN,
|
|
568
|
+
marks: [],
|
|
569
|
+
text
|
|
570
|
+
};
|
|
571
|
+
if (hasStrongFormatting(el) && span.marks.push("strong"), hasEmphasisFormatting(el) && !isInHeading(el) && !isInBlockquote(el) && span.marks.push("em"), hasUnderlineFormatting(el))
|
|
572
|
+
if (isElement(el) && el.parentElement && tagName(el.parentElement) === "a") {
|
|
573
|
+
const linkElement = el.parentElement;
|
|
574
|
+
if (linkElement) {
|
|
575
|
+
const prevSibling = linkElement.previousSibling, nextSibling = linkElement.nextSibling, hasPrevUnderline = prevSibling && isElement(prevSibling) && hasUnderlineFormatting(prevSibling), hasNextUnderline = nextSibling && isElement(nextSibling) && hasUnderlineFormatting(nextSibling);
|
|
576
|
+
(hasPrevUnderline || hasNextUnderline) && span.marks.push("underline");
|
|
577
|
+
}
|
|
578
|
+
} else
|
|
579
|
+
span.marks.push("underline");
|
|
580
|
+
return hasStrikethroughFormatting(el) && span.marks.push("strike-through"), span;
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
];
|
|
585
|
+
}
|
|
202
586
|
const LIST_CONTAINER_TAGS = Object.keys(HTML_LIST_CONTAINER_TAGS);
|
|
203
587
|
function isEmphasis$1(el) {
|
|
204
588
|
const style = isElement(el) && el.getAttribute("style");
|
|
@@ -572,7 +956,7 @@ function createNotionRules() {
|
|
|
572
956
|
function getListItemStyle(el) {
|
|
573
957
|
const style = isElement(el) && el.getAttribute("style");
|
|
574
958
|
if (style && style.match(/lfo\d+/))
|
|
575
|
-
return style.match("lfo1") ? "
|
|
959
|
+
return style.match("lfo1") ? "number" : "bullet";
|
|
576
960
|
}
|
|
577
961
|
function getListItemLevel(el) {
|
|
578
962
|
const style = isElement(el) && el.getAttribute("style");
|
|
@@ -585,20 +969,33 @@ function getListItemLevel(el) {
|
|
|
585
969
|
return (level ? Number.parseInt(level, 10) : 1) || 1;
|
|
586
970
|
}
|
|
587
971
|
function isWordListElement(el) {
|
|
588
|
-
|
|
972
|
+
if (!isElement(el))
|
|
973
|
+
return !1;
|
|
974
|
+
if (el.className && (el.className === "MsoListParagraphCxSpFirst" || el.className === "MsoListParagraphCxSpMiddle" || el.className === "MsoListParagraphCxSpLast"))
|
|
975
|
+
return !0;
|
|
976
|
+
const style = el.getAttribute("style");
|
|
977
|
+
return !!(style && /mso-list:\s*l\d+\s+level\d+\s+lfo\d+/.test(style));
|
|
978
|
+
}
|
|
979
|
+
function getHeadingStyle(el) {
|
|
980
|
+
const tag = tagName(el);
|
|
981
|
+
if (tag && HTML_HEADER_TAGS[tag])
|
|
982
|
+
return HTML_HEADER_TAGS[tag]?.style;
|
|
589
983
|
}
|
|
590
984
|
function createWordRules() {
|
|
591
985
|
return [
|
|
592
986
|
{
|
|
593
987
|
deserialize(el, next) {
|
|
594
|
-
|
|
988
|
+
const tag = tagName(el);
|
|
989
|
+
if ((tag === "p" || HTML_HEADER_TAGS[tag || ""]) && isWordListElement(el)) {
|
|
990
|
+
const headingStyle = getHeadingStyle(el);
|
|
595
991
|
return {
|
|
596
992
|
...DEFAULT_BLOCK,
|
|
597
993
|
listItem: getListItemStyle(el),
|
|
598
994
|
level: getListItemLevel(el),
|
|
599
|
-
style: BLOCK_DEFAULT_STYLE,
|
|
995
|
+
style: headingStyle || BLOCK_DEFAULT_STYLE,
|
|
600
996
|
children: next(el.childNodes)
|
|
601
997
|
};
|
|
998
|
+
}
|
|
602
999
|
}
|
|
603
1000
|
}
|
|
604
1001
|
];
|
|
@@ -606,16 +1003,71 @@ function createWordRules() {
|
|
|
606
1003
|
function createRules(schema, options) {
|
|
607
1004
|
return [
|
|
608
1005
|
...createWordRules(),
|
|
1006
|
+
...createWordOnlineRules(schema, options),
|
|
609
1007
|
...createNotionRules(),
|
|
610
1008
|
...createGDocsRules(schema),
|
|
611
1009
|
...createHTMLRules(schema, options)
|
|
612
1010
|
];
|
|
613
1011
|
}
|
|
1012
|
+
function trimWhitespace(context, mode, blocks2) {
|
|
1013
|
+
const trimmedBlocks = [];
|
|
1014
|
+
let consecutiveEmptyCount = 0;
|
|
1015
|
+
for (const block of blocks2) {
|
|
1016
|
+
const trimmedBlock = isTextBlock(context, block) ? trimTextBlockWhitespace(block) : block;
|
|
1017
|
+
if (mode === "preserve") {
|
|
1018
|
+
trimmedBlocks.push(trimmedBlock);
|
|
1019
|
+
continue;
|
|
1020
|
+
}
|
|
1021
|
+
if (mode === "remove") {
|
|
1022
|
+
if (isEmptyTextBlock(context, trimmedBlock))
|
|
1023
|
+
continue;
|
|
1024
|
+
trimmedBlocks.push(trimmedBlock);
|
|
1025
|
+
continue;
|
|
1026
|
+
}
|
|
1027
|
+
if (mode === "normalize") {
|
|
1028
|
+
if (isEmptyTextBlock(context, trimmedBlock)) {
|
|
1029
|
+
consecutiveEmptyCount++, consecutiveEmptyCount === 1 && trimmedBlocks.push(trimmedBlock);
|
|
1030
|
+
continue;
|
|
1031
|
+
}
|
|
1032
|
+
trimmedBlocks.push(trimmedBlock), consecutiveEmptyCount = 0;
|
|
1033
|
+
}
|
|
1034
|
+
}
|
|
1035
|
+
return trimmedBlocks;
|
|
1036
|
+
}
|
|
1037
|
+
function isEmptyTextBlock(context, block) {
|
|
1038
|
+
return !(!isTextBlock(context, block) || block.children.some(
|
|
1039
|
+
(child) => !isSpan(context, child) || child.text.trim() !== ""
|
|
1040
|
+
));
|
|
1041
|
+
}
|
|
1042
|
+
function trimTextBlockWhitespace(block) {
|
|
1043
|
+
let index = 0;
|
|
1044
|
+
for (const child of block.children) {
|
|
1045
|
+
if (!isMinimalSpan(child)) {
|
|
1046
|
+
index++;
|
|
1047
|
+
continue;
|
|
1048
|
+
}
|
|
1049
|
+
const nextChild = nextSpan(block, index), prevChild = prevSpan(block, index);
|
|
1050
|
+
index === 0 && (child.text = child.text.replace(/^[^\S\n]+/g, "")), index === block.children.length - 1 && (child.text = child.text.replace(/[^\S\n]+$/g, "")), /\s/.test(child.text.slice(Math.max(0, child.text.length - 1))) && nextChild && isMinimalSpan(nextChild) && /\s/.test(nextChild.text.slice(0, 1)) && (child.text = child.text.replace(/[^\S\n]+$/g, "")), /\s/.test(child.text.slice(0, 1)) && prevChild && isMinimalSpan(prevChild) && /\s/.test(prevChild.text.slice(Math.max(0, prevChild.text.length - 1))) && (child.text = child.text.replace(/^[^\S\n]+/g, "")), child.text || block.children.splice(index, 1), prevChild && isEqual(prevChild.marks, child.marks) && isWhiteSpaceChar(child.text) ? (prevChild.text += " ", block.children.splice(index, 1)) : nextChild && isEqual(nextChild.marks, child.marks) && isWhiteSpaceChar(child.text) && (nextChild.text = ` ${nextChild.text}`, block.children.splice(index, 1)), index++;
|
|
1051
|
+
}
|
|
1052
|
+
return block;
|
|
1053
|
+
}
|
|
1054
|
+
function nextSpan(block, index) {
|
|
1055
|
+
const next = block.children[index + 1];
|
|
1056
|
+
return next && next._type === "span" ? next : null;
|
|
1057
|
+
}
|
|
1058
|
+
function prevSpan(block, index) {
|
|
1059
|
+
const prev = block.children[index - 1];
|
|
1060
|
+
return prev && prev._type === "span" ? prev : null;
|
|
1061
|
+
}
|
|
1062
|
+
function isWhiteSpaceChar(text) {
|
|
1063
|
+
return ["\xA0", " "].includes(text);
|
|
1064
|
+
}
|
|
614
1065
|
class HtmlDeserializer {
|
|
615
1066
|
keyGenerator;
|
|
616
1067
|
schema;
|
|
617
1068
|
rules;
|
|
618
1069
|
parseHtml;
|
|
1070
|
+
whitespaceMode;
|
|
619
1071
|
_markDefs = [];
|
|
620
1072
|
/**
|
|
621
1073
|
* Create a new serializer respecting a Sanity block content type's schema
|
|
@@ -628,9 +1080,14 @@ class HtmlDeserializer {
|
|
|
628
1080
|
keyGenerator: options.keyGenerator,
|
|
629
1081
|
matchers: options.matchers
|
|
630
1082
|
});
|
|
631
|
-
this.schema = schema, this.keyGenerator = options.keyGenerator ?? keyGenerator, this.rules = [...rules, ...standardRules];
|
|
1083
|
+
this.schema = schema, this.keyGenerator = options.keyGenerator ?? keyGenerator, this.rules = [...rules, ...standardRules], this.whitespaceMode = unstable_whitespaceOnPasteMode;
|
|
632
1084
|
const parseHtml = options.parseHtml || defaultParseHtml();
|
|
633
|
-
this.parseHtml = (html) =>
|
|
1085
|
+
this.parseHtml = (html) => {
|
|
1086
|
+
const cleanHTML = O(html), doc = parseHtml(cleanHTML);
|
|
1087
|
+
for (const processor of preprocessors)
|
|
1088
|
+
processor(cleanHTML, doc);
|
|
1089
|
+
return doc.body;
|
|
1090
|
+
};
|
|
634
1091
|
}
|
|
635
1092
|
/**
|
|
636
1093
|
* Deserialize HTML.
|
|
@@ -641,7 +1098,8 @@ class HtmlDeserializer {
|
|
|
641
1098
|
deserialize = (html) => {
|
|
642
1099
|
this._markDefs = [];
|
|
643
1100
|
const { parseHtml } = this, fragment = parseHtml(html), children = Array.from(fragment.childNodes), blocks2 = trimWhitespace(
|
|
644
|
-
this.schema,
|
|
1101
|
+
{ schema: this.schema },
|
|
1102
|
+
this.whitespaceMode,
|
|
645
1103
|
flattenNestedBlocks(
|
|
646
1104
|
{ schema: this.schema },
|
|
647
1105
|
ensureRootIsBlocks(
|
|
@@ -769,15 +1227,6 @@ class HtmlDeserializer {
|
|
|
769
1227
|
}, []);
|
|
770
1228
|
};
|
|
771
1229
|
}
|
|
772
|
-
function preprocess(html, parseHtml, options) {
|
|
773
|
-
const cleanHTML = O(html), doc = parseHtml(normalizeHtmlBeforePreprocess(cleanHTML));
|
|
774
|
-
return preprocessors.forEach((processor) => {
|
|
775
|
-
processor(cleanHTML, doc, options);
|
|
776
|
-
}), doc;
|
|
777
|
-
}
|
|
778
|
-
function normalizeHtmlBeforePreprocess(html) {
|
|
779
|
-
return html.trim();
|
|
780
|
-
}
|
|
781
1230
|
function normalizeBlock(node, options = {}) {
|
|
782
1231
|
const schema = {
|
|
783
1232
|
block: {
|