@portabletext/block-tools 4.0.2 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. package/lib/_chunks-es/helpers.js +1 -64
  2. package/lib/_chunks-es/helpers.js.map +1 -1
  3. package/lib/index.js +487 -38
  4. package/lib/index.js.map +1 -1
  5. package/package.json +9 -9
  6. package/src/HtmlDeserializer/helpers.ts +1 -183
  7. package/src/HtmlDeserializer/index.ts +14 -25
  8. package/src/HtmlDeserializer/preprocessors/index.ts +8 -6
  9. package/src/HtmlDeserializer/preprocessors/{gdocs.ts → preprocessor.gdocs.ts} +2 -22
  10. package/src/HtmlDeserializer/preprocessors/{html.ts → preprocessor.html.ts} +1 -1
  11. package/src/HtmlDeserializer/preprocessors/{notion.ts → preprocessor.notion.ts} +1 -1
  12. package/src/HtmlDeserializer/preprocessors/{whitespace.ts → preprocessor.whitespace.ts} +28 -3
  13. package/src/HtmlDeserializer/preprocessors/{word.ts → preprocessor.word.ts} +1 -1
  14. package/src/HtmlDeserializer/rules/index.ts +6 -4
  15. package/src/HtmlDeserializer/rules/{gdocs.ts → rules.gdocs.ts} +1 -1
  16. package/src/HtmlDeserializer/rules/{html.ts → rules.html.ts} +3 -3
  17. package/src/HtmlDeserializer/rules/{notion.ts → rules.notion.ts} +1 -1
  18. package/src/HtmlDeserializer/rules/rules.word.ts +95 -0
  19. package/src/HtmlDeserializer/trim-whitespace.ts +157 -0
  20. package/src/HtmlDeserializer/word-online/asserters.word-online.ts +153 -0
  21. package/src/HtmlDeserializer/word-online/preprocessor.word-online.ts +263 -0
  22. package/src/HtmlDeserializer/word-online/rules.word-online.ts +390 -0
  23. package/src/HtmlDeserializer/rules/word.ts +0 -59
  24. /package/src/HtmlDeserializer/rules/{whitespace-text-node.ts → rules.whitespace-text-node.ts} +0 -0
package/lib/index.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import { sanitySchemaToPortableTextSchema } from "@portabletext/sanity-bridge";
2
2
  import { isTextBlock, isSpan } from "@portabletext/schema";
3
3
  import flatten from "lodash/flatten.js";
4
- import { tagName, removeAllWhitespace, normalizeWhitespace, PRESERVE_WHITESPACE_TAGS, isElement, DEFAULT_SPAN, DEFAULT_BLOCK, HTML_HEADER_TAGS, HTML_BLOCK_TAGS, BLOCK_DEFAULT_STYLE, HTML_LIST_CONTAINER_TAGS, HTML_SPAN_TAGS, HTML_LIST_ITEM_TAGS, HTML_DECORATOR_TAGS, defaultParseHtml, trimWhitespace, flattenNestedBlocks, ensureRootIsBlocks, resolveJsType, isPlaceholderDecorator, isPlaceholderAnnotation, isMinimalBlock, isMinimalSpan, isNodeList } from "./_chunks-es/helpers.js";
4
+ import { isElement, tagName, PRESERVE_WHITESPACE_TAGS, HTML_BLOCK_TAGS, HTML_HEADER_TAGS, DEFAULT_SPAN, DEFAULT_BLOCK, BLOCK_DEFAULT_STYLE, HTML_LIST_CONTAINER_TAGS, HTML_SPAN_TAGS, HTML_LIST_ITEM_TAGS, HTML_DECORATOR_TAGS, isMinimalSpan, defaultParseHtml, flattenNestedBlocks, ensureRootIsBlocks, resolveJsType, isPlaceholderDecorator, isPlaceholderAnnotation, isMinimalBlock, isNodeList } from "./_chunks-es/helpers.js";
5
5
  import isEqual from "lodash/isEqual.js";
6
6
  var s = { 0: 8203, 1: 8204, 2: 8205, 3: 8290, 4: 8291, 5: 8288, 6: 65279, 7: 8289, 8: 119155, 9: 119156, a: 119157, b: 119158, c: 119159, d: 119160, e: 119161, f: 119162 }, c = { 0: 8203, 1: 8204, 2: 8205, 3: 65279 };
7
7
  new Array(4).fill(String.fromCodePoint(c[0])).join("");
@@ -28,13 +28,203 @@ for (let i = 0; i < 256; ++i)
28
28
  function randomKey(length) {
29
29
  return whatwgRNG(length).reduce((str, n) => str + byteToHex[n], "").slice(0, length);
30
30
  }
31
+ function isWordOnlineHtml(html) {
32
+ return /class="(?:TextRun|NormalTextRun)[^"]*SCXW\d+[^"]*BCX\d+/.test(html) || /class="EOP[^"]*SCXW\d+/.test(html);
33
+ }
34
+ function isWordOnlineTextRun(el) {
35
+ return !isElement(el) || tagName(el) !== "span" ? !1 : el.classList.contains("TextRun") && !el.classList.contains("EOP");
36
+ }
37
+ function isNormalTextRun(el) {
38
+ return !isElement(el) || tagName(el) !== "span" ? !1 : el.classList.contains("NormalTextRun");
39
+ }
40
+ function isTextRunSpan(el) {
41
+ return !isElement(el) || tagName(el) !== "span" ? !1 : el.classList.contains("TextRun") && !el.classList.contains("NormalTextRun") && !el.classList.contains("EOP");
42
+ }
43
+ function isFindHit(el) {
44
+ return !isElement(el) || tagName(el) !== "span" ? !1 : el.classList.contains("FindHit");
45
+ }
46
+ function isInHeading(el) {
47
+ let current = el;
48
+ for (; current; ) {
49
+ if (isElement(current) && tagName(current) === "word-online-block" && /^heading \d$/.test(current.getAttribute("data-parastyle") ?? ""))
50
+ return !0;
51
+ current = current.parentNode;
52
+ }
53
+ return !1;
54
+ }
55
+ function isInBlockquote(el) {
56
+ let current = el;
57
+ for (; current; ) {
58
+ if (isElement(current) && tagName(current) === "word-online-block" && current.getAttribute("data-parastyle") === "Quote")
59
+ return !0;
60
+ current = current.parentNode;
61
+ }
62
+ return !1;
63
+ }
64
+ function hasStrongFormatting(el) {
65
+ const style = el.getAttribute("style") ?? "";
66
+ return el.classList.contains("MacChromeBold") || /font-weight\s*:\s*bold/.test(style);
67
+ }
68
+ function hasEmphasisFormatting(el) {
69
+ const style = el.getAttribute("style") ?? "";
70
+ return /font-style\s*:\s*italic/.test(style);
71
+ }
72
+ function hasUnderlineFormatting(el) {
73
+ const style = el.getAttribute("style") ?? "";
74
+ return el.classList.contains("Underlined") || /text-decoration\s*:\s*underline/.test(style);
75
+ }
76
+ function hasStrikethroughFormatting(el) {
77
+ const style = el.getAttribute("style") ?? "";
78
+ return el.classList.contains("Strikethrough") || /text-decoration\s*:\s*line-through/.test(style);
79
+ }
80
+ function hasFormatting(el) {
81
+ return hasStrongFormatting(el) || hasEmphasisFormatting(el) || hasUnderlineFormatting(el) || hasStrikethroughFormatting(el);
82
+ }
83
+ function preprocessWordOnline(html, doc) {
84
+ if (!isWordOnlineHtml(html))
85
+ return doc;
86
+ const paragraphs = Array.from(
87
+ doc.querySelectorAll('p.Paragraph[role="heading"]')
88
+ );
89
+ for (const paragraph of paragraphs) {
90
+ const ariaLevel = paragraph.getAttribute("aria-level");
91
+ if (ariaLevel) {
92
+ const wrapper = doc.createElement("word-online-block");
93
+ wrapper.setAttribute("data-parastyle", `heading ${ariaLevel}`);
94
+ const parent = paragraph.parentNode;
95
+ if (parent) {
96
+ for (parent.insertBefore(wrapper, paragraph); paragraph.firstChild; )
97
+ wrapper.appendChild(paragraph.firstChild);
98
+ parent.removeChild(paragraph);
99
+ }
100
+ }
101
+ }
102
+ let child = doc.body.firstChild;
103
+ for (; child; ) {
104
+ const next = child.nextSibling;
105
+ if (!isElement(child) || !tagName(child)?.includes("span")) {
106
+ child = next;
107
+ continue;
108
+ }
109
+ const paraStyle = getParaStyle(child);
110
+ if (!paraStyle) {
111
+ child = next;
112
+ continue;
113
+ }
114
+ const group = [child];
115
+ let sibling = next;
116
+ for (; sibling && !(!isElement(sibling) || getParaStyle(sibling) !== paraStyle); )
117
+ group.push(sibling), sibling = sibling.nextSibling;
118
+ const wrapper = doc.createElement("word-online-block");
119
+ wrapper.setAttribute("data-parastyle", paraStyle), doc.body.insertBefore(wrapper, child);
120
+ for (const span of group)
121
+ wrapper.appendChild(span);
122
+ child = sibling;
123
+ }
124
+ const textRunSpans = Array.from(doc.body.querySelectorAll("span")).filter(
125
+ isTextRunSpan
126
+ );
127
+ for (const textRunSpan of textRunSpans) {
128
+ const normalTextRuns = Array.from(textRunSpan.childNodes).filter(
129
+ isNormalTextRun
130
+ );
131
+ for (const normalTextRun of normalTextRuns) {
132
+ let foundNestedSpan = !0;
133
+ for (; foundNestedSpan; ) {
134
+ const children = Array.from(normalTextRun.childNodes), nestedSpanIndex = children.findIndex(
135
+ (node) => isElement(node) && tagName(node) === "span" && node.textContent.trim() === ""
136
+ );
137
+ if (nestedSpanIndex === -1) {
138
+ foundNestedSpan = !1;
139
+ break;
140
+ }
141
+ const nestedSpan = children.at(nestedSpanIndex);
142
+ if (!nestedSpan) {
143
+ foundNestedSpan = !1;
144
+ break;
145
+ }
146
+ const spaceText = nestedSpan.textContent?.replace(/\u00a0/g, " ") ?? "", isSpaceAtBeginning = !children.slice(0, nestedSpanIndex).some((n) => n.nodeType === 3);
147
+ if (normalTextRun.removeChild(nestedSpan), isSpaceAtBeginning) {
148
+ const firstTextNode = Array.from(normalTextRun.childNodes).find(
149
+ (n) => n.nodeType === 3
150
+ );
151
+ if (firstTextNode)
152
+ firstTextNode.textContent = spaceText + (firstTextNode.textContent || "");
153
+ else {
154
+ const spaceNode = doc.createTextNode(spaceText);
155
+ normalTextRun.insertBefore(spaceNode, normalTextRun.firstChild);
156
+ }
157
+ } else {
158
+ const nextSibling = textRunSpan.nextSibling, currentHasFormatting = hasFormatting(textRunSpan);
159
+ if (nextSibling && isElement(nextSibling) && isTextRunSpan(nextSibling)) {
160
+ const nextHasFormatting = hasFormatting(nextSibling);
161
+ if (currentHasFormatting && !nextHasFormatting) {
162
+ const nextNormalTextRun = Array.from(nextSibling.childNodes).find(
163
+ isNormalTextRun
164
+ );
165
+ if (nextNormalTextRun && isElement(nextNormalTextRun)) {
166
+ const firstChild = nextNormalTextRun.firstChild;
167
+ if (firstChild && firstChild.nodeType === 3)
168
+ firstChild.textContent = spaceText + (firstChild.textContent ?? "");
169
+ else {
170
+ const spaceNode = doc.createTextNode(spaceText);
171
+ nextNormalTextRun.insertBefore(
172
+ spaceNode,
173
+ nextNormalTextRun.firstChild
174
+ );
175
+ }
176
+ }
177
+ } else {
178
+ const lastTextNode = Array.from(normalTextRun.childNodes).find(
179
+ (n) => n.nodeType === 3
180
+ );
181
+ if (lastTextNode)
182
+ lastTextNode.textContent = (lastTextNode.textContent ?? "") + spaceText;
183
+ else {
184
+ const spaceNode = doc.createTextNode(spaceText);
185
+ normalTextRun.appendChild(spaceNode);
186
+ }
187
+ }
188
+ } else {
189
+ const lastTextNode = Array.from(normalTextRun.childNodes).find(
190
+ (n) => n.nodeType === 3
191
+ );
192
+ if (lastTextNode)
193
+ lastTextNode.textContent = (lastTextNode.textContent ?? "") + spaceText;
194
+ else {
195
+ const spaceNode = doc.createTextNode(spaceText);
196
+ normalTextRun.appendChild(spaceNode);
197
+ }
198
+ }
199
+ }
200
+ }
201
+ }
202
+ }
203
+ return doc;
204
+ }
205
+ function getParaStyle(element) {
206
+ const directStyle = element.getAttribute("data-ccp-parastyle");
207
+ if (directStyle)
208
+ return directStyle;
209
+ if (tagName(element) === "span" && element.classList.contains("TextRun")) {
210
+ const normalTextRuns = Array.from(
211
+ element.querySelectorAll(".NormalTextRun")
212
+ );
213
+ if (normalTextRuns.length > 0) {
214
+ const firstStyle = normalTextRuns[0].getAttribute("data-ccp-parastyle");
215
+ if (firstStyle && normalTextRuns.every(
216
+ (normalTextRun) => normalTextRun.getAttribute("data-ccp-parastyle") === firstStyle
217
+ ))
218
+ return firstStyle;
219
+ }
220
+ }
221
+ }
31
222
  const _XPathResult = {
32
223
  BOOLEAN_TYPE: 3,
33
224
  ORDERED_NODE_ITERATOR_TYPE: 5,
34
225
  UNORDERED_NODE_SNAPSHOT_TYPE: 6
35
226
  };
36
- var preprocessGDocs = (_html, doc, options) => {
37
- const whitespaceOnPasteMode = options?.unstable_whitespaceOnPasteMode || "preserve";
227
+ function preprocessGDocs(_html, doc) {
38
228
  let gDocsRootOrSiblingNode = doc.evaluate(
39
229
  '//*[@id and contains(@id, "docs-internal-guid")]',
40
230
  doc,
@@ -44,14 +234,7 @@ var preprocessGDocs = (_html, doc, options) => {
44
234
  ).iterateNext();
45
235
  if (gDocsRootOrSiblingNode) {
46
236
  const isWrappedRootTag = tagName(gDocsRootOrSiblingNode) === "b";
47
- switch (isWrappedRootTag || (gDocsRootOrSiblingNode = doc.body), whitespaceOnPasteMode) {
48
- case "normalize":
49
- normalizeWhitespace(gDocsRootOrSiblingNode);
50
- break;
51
- case "remove":
52
- removeAllWhitespace(gDocsRootOrSiblingNode);
53
- break;
54
- }
237
+ isWrappedRootTag || (gDocsRootOrSiblingNode = doc.body);
55
238
  const childNodes = doc.evaluate(
56
239
  "//*",
57
240
  doc,
@@ -68,7 +251,7 @@ var preprocessGDocs = (_html, doc, options) => {
68
251
  ), doc;
69
252
  }
70
253
  return doc;
71
- };
254
+ }
72
255
  const unwantedWordDocumentPaths = [
73
256
  "/html/text()",
74
257
  "/html/head/text()",
@@ -82,7 +265,7 @@ const unwantedWordDocumentPaths = [
82
265
  "//meta",
83
266
  "//link"
84
267
  ];
85
- var preprocessHTML = (_html, doc) => {
268
+ function preprocessHTML(_html, doc) {
86
269
  const bodyTextNodes = doc.evaluate(
87
270
  "/html/body/text()",
88
271
  doc,
@@ -110,7 +293,8 @@ var preprocessHTML = (_html, doc) => {
110
293
  unwanted && unwanted.parentNode?.removeChild(unwanted);
111
294
  }
112
295
  return doc;
113
- }, preprocessNotion = (html, doc) => {
296
+ }
297
+ function preprocessNotion(html, doc) {
114
298
  const NOTION_REGEX = /<!-- notionvc:.*?-->/g;
115
299
  if (html.match(NOTION_REGEX)) {
116
300
  const childNodes = doc.evaluate(
@@ -125,18 +309,30 @@ var preprocessHTML = (_html, doc) => {
125
309
  return doc;
126
310
  }
127
311
  return doc;
128
- }, preprocessWhitespace = (_2, doc) => {
312
+ }
313
+ const BLOCK_CONTAINER_ELEMENTS = [
314
+ "body",
315
+ "table",
316
+ "tbody",
317
+ "thead",
318
+ "tfoot",
319
+ "tr",
320
+ "ul",
321
+ "ol"
322
+ ];
323
+ function preprocessWhitespace(_2, doc) {
129
324
  function processNode(node) {
130
325
  if (node.nodeType === _XPathResult.BOOLEAN_TYPE && !PRESERVE_WHITESPACE_TAGS.includes(
131
326
  node.parentElement?.tagName.toLowerCase() || ""
132
- ))
133
- node.textContent = node.textContent?.replace(/\s\s+/g, " ").replace(/[\r\n]+/g, " ") || "";
134
- else
135
- for (let i = 0; i < node.childNodes.length; i++)
327
+ )) {
328
+ const normalized = node.textContent?.replace(/\s\s+/g, " ").replace(/[\r\n]+/g, " ") || "", parentTag = node.parentElement?.tagName.toLowerCase();
329
+ parentTag && BLOCK_CONTAINER_ELEMENTS.includes(parentTag) && normalized.trim() === "" ? node.parentNode?.removeChild(node) : node.textContent = normalized;
330
+ } else
331
+ for (let i = node.childNodes.length - 1; i >= 0; i--)
136
332
  processNode(node.childNodes[i]);
137
333
  }
138
334
  return processNode(doc.body), doc;
139
- };
335
+ }
140
336
  const WORD_HTML_REGEX = /(class="?Mso|style=(?:"|')[^"]*?\bmso-|w:WordDocument|<o:\w+>|<\/font>)/, unwantedPaths = [
141
337
  "//o:p",
142
338
  "//span[@style='mso-list:Ignore']",
@@ -160,7 +356,7 @@ const WORD_HTML_REGEX = /(class="?Mso|style=(?:"|')[^"]*?\bmso-|w:WordDocument|<
160
356
  function isWordHtml(html) {
161
357
  return WORD_HTML_REGEX.test(html);
162
358
  }
163
- var preprocessWord = (html, doc) => {
359
+ function preprocessWord(html, doc) {
164
360
  if (!isWordHtml(html))
165
361
  return doc;
166
362
  const unwantedNodes = doc.evaluate(
@@ -192,13 +388,201 @@ var preprocessWord = (html, doc) => {
192
388
  }), child.appendChild(text), mappedElm?.parentNode?.replaceChild(parentElement, mappedElm);
193
389
  }
194
390
  return doc;
195
- }, preprocessors = [
391
+ }
392
+ const preprocessors = [
196
393
  preprocessWhitespace,
197
394
  preprocessNotion,
198
395
  preprocessWord,
396
+ preprocessWordOnline,
199
397
  preprocessGDocs,
200
398
  preprocessHTML
201
399
  ];
400
+ function mapParaStyleToBlockStyle(schema, paraStyle) {
401
+ const blockStyle = {
402
+ "heading 1": "h1",
403
+ "heading 2": "h2",
404
+ "heading 3": "h3",
405
+ "heading 4": "h4",
406
+ "heading 5": "h5",
407
+ "heading 6": "h6",
408
+ Quote: "blockquote"
409
+ }[paraStyle] ?? "normal";
410
+ return schema.styles.find((style) => style.name === blockStyle)?.name;
411
+ }
412
+ function createWordOnlineRules(schema, options) {
413
+ return [
414
+ // Image rule - handles bare Word Online <img> tags with WACImage class
415
+ {
416
+ deserialize(el) {
417
+ if (!isElement(el) || tagName(el) !== "img")
418
+ return;
419
+ const classNameRaw = el.className;
420
+ let className = "";
421
+ if (typeof classNameRaw == "string" ? className = classNameRaw : classNameRaw && typeof classNameRaw == "object" && (className = classNameRaw.baseVal || ""), !className.includes("WACImage"))
422
+ return;
423
+ const src = el.getAttribute("src") ?? void 0, alt = el.getAttribute("alt") ?? void 0, props = Object.fromEntries(
424
+ Array.from(el.attributes).map((attr) => [attr.name, attr.value])
425
+ ), image = options.matchers?.image?.({
426
+ context: {
427
+ schema,
428
+ keyGenerator: options.keyGenerator ?? keyGenerator
429
+ },
430
+ props: {
431
+ ...props,
432
+ ...src ? { src } : {},
433
+ ...alt ? { alt } : {}
434
+ }
435
+ });
436
+ if (image)
437
+ return {
438
+ _type: "__block",
439
+ block: image
440
+ };
441
+ }
442
+ },
443
+ // Image rule - handles Word Online images wrapped in WACImageContainer
444
+ {
445
+ deserialize(el) {
446
+ if (!isElement(el))
447
+ return;
448
+ const classNameRaw = el.className;
449
+ let className = "";
450
+ if (typeof classNameRaw == "string" ? className = classNameRaw : classNameRaw && typeof classNameRaw == "object" && (className = classNameRaw.baseVal || ""), !className.includes("WACImageContainer"))
451
+ return;
452
+ const img = el.querySelector("img");
453
+ if (!img)
454
+ return;
455
+ const src = img.getAttribute("src") ?? void 0, alt = img.getAttribute("alt") ?? void 0, props = Object.fromEntries(
456
+ Array.from(img.attributes).map((attr) => [attr.name, attr.value])
457
+ ), isInsideListItem = el.closest("li") !== null;
458
+ if (el.closest("p") === null || isInsideListItem) {
459
+ const inlineImage = options.matchers?.inlineImage?.({
460
+ context: {
461
+ schema,
462
+ keyGenerator: options.keyGenerator ?? keyGenerator
463
+ },
464
+ props: {
465
+ ...props,
466
+ ...src ? { src } : {},
467
+ ...alt ? { alt } : {}
468
+ }
469
+ });
470
+ if (inlineImage)
471
+ return inlineImage;
472
+ }
473
+ const image = options.matchers?.image?.({
474
+ context: {
475
+ schema,
476
+ keyGenerator: options.keyGenerator ?? keyGenerator
477
+ },
478
+ props: {
479
+ ...props,
480
+ ...src ? { src } : {},
481
+ ...alt ? { alt } : {}
482
+ }
483
+ });
484
+ if (image)
485
+ return {
486
+ _type: "__block",
487
+ block: image
488
+ };
489
+ }
490
+ },
491
+ // List item rule - handles <li> elements with aria-level
492
+ {
493
+ deserialize(el, next) {
494
+ if (!isElement(el) || tagName(el) !== "li")
495
+ return;
496
+ const ariaLevel = el.getAttribute("data-aria-level");
497
+ if (!ariaLevel)
498
+ return;
499
+ const listItem = tagName(el.parentNode) === "ol" ? "number" : "bullet";
500
+ let childNodesToProcess = el.childNodes, blockStyle = "normal";
501
+ if (el.childNodes.length === 1 && el.firstChild && isElement(el.firstChild)) {
502
+ const childTag = tagName(el.firstChild);
503
+ if (childTag && (HTML_BLOCK_TAGS[childTag] || HTML_HEADER_TAGS[childTag] || childTag === "word-online-block")) {
504
+ if (childTag === "word-online-block") {
505
+ const paraStyle = el.firstChild.getAttribute("data-parastyle"), foundBlockStyle = paraStyle ? mapParaStyleToBlockStyle(schema, paraStyle) : void 0;
506
+ foundBlockStyle && (blockStyle = foundBlockStyle);
507
+ }
508
+ childNodesToProcess = el.firstChild.childNodes;
509
+ }
510
+ }
511
+ const children = next(childNodesToProcess);
512
+ let childArray = Array.isArray(children) ? children : [children].filter(Boolean);
513
+ for (; childArray.length > 0; ) {
514
+ const lastChild = childArray[childArray.length - 1];
515
+ if (lastChild && typeof lastChild == "object" && "text" in lastChild) {
516
+ const text = lastChild.text.trimEnd();
517
+ if (text === "")
518
+ childArray = childArray.slice(0, -1);
519
+ else if (text !== lastChild.text) {
520
+ lastChild.text = text;
521
+ break;
522
+ } else
523
+ break;
524
+ } else
525
+ break;
526
+ }
527
+ return {
528
+ _type: schema.block.name,
529
+ children: childArray,
530
+ markDefs: [],
531
+ style: blockStyle,
532
+ listItem,
533
+ level: parseInt(ariaLevel, 10)
534
+ };
535
+ }
536
+ },
537
+ // Block style rule - handles paragraph styles like Quote
538
+ // The preprocessor wraps grouped NormalTextRun spans in a word-online-block element
539
+ {
540
+ deserialize(el, next) {
541
+ if (!isElement(el))
542
+ return;
543
+ const paraStyle = el.getAttribute("data-parastyle"), blockStyle = paraStyle ? mapParaStyleToBlockStyle(schema, paraStyle) : void 0;
544
+ if (!blockStyle)
545
+ return;
546
+ const children = next(el.childNodes);
547
+ return {
548
+ _type: schema.block.name,
549
+ style: blockStyle,
550
+ markDefs: [],
551
+ children: Array.isArray(children) ? children : children ? [children] : []
552
+ };
553
+ }
554
+ },
555
+ // TextRun rule
556
+ {
557
+ deserialize(el) {
558
+ if (isWordOnlineTextRun(el)) {
559
+ if (!isElement(el) || !el.textContent)
560
+ return;
561
+ const text = Array.from(el.childNodes).filter(
562
+ (node) => isNormalTextRun(node) || isFindHit(node)
563
+ ).map((span2) => isElement(span2) ? span2.textContent ?? "" : "").join("");
564
+ if (!text)
565
+ return;
566
+ const span = {
567
+ ...DEFAULT_SPAN,
568
+ marks: [],
569
+ text
570
+ };
571
+ if (hasStrongFormatting(el) && span.marks.push("strong"), hasEmphasisFormatting(el) && !isInHeading(el) && !isInBlockquote(el) && span.marks.push("em"), hasUnderlineFormatting(el))
572
+ if (isElement(el) && el.parentElement && tagName(el.parentElement) === "a") {
573
+ const linkElement = el.parentElement;
574
+ if (linkElement) {
575
+ const prevSibling = linkElement.previousSibling, nextSibling = linkElement.nextSibling, hasPrevUnderline = prevSibling && isElement(prevSibling) && hasUnderlineFormatting(prevSibling), hasNextUnderline = nextSibling && isElement(nextSibling) && hasUnderlineFormatting(nextSibling);
576
+ (hasPrevUnderline || hasNextUnderline) && span.marks.push("underline");
577
+ }
578
+ } else
579
+ span.marks.push("underline");
580
+ return hasStrikethroughFormatting(el) && span.marks.push("strike-through"), span;
581
+ }
582
+ }
583
+ }
584
+ ];
585
+ }
202
586
  const LIST_CONTAINER_TAGS = Object.keys(HTML_LIST_CONTAINER_TAGS);
203
587
  function isEmphasis$1(el) {
204
588
  const style = isElement(el) && el.getAttribute("style");
@@ -572,7 +956,7 @@ function createNotionRules() {
572
956
  function getListItemStyle(el) {
573
957
  const style = isElement(el) && el.getAttribute("style");
574
958
  if (style && style.match(/lfo\d+/))
575
- return style.match("lfo1") ? "bullet" : "number";
959
+ return style.match("lfo1") ? "number" : "bullet";
576
960
  }
577
961
  function getListItemLevel(el) {
578
962
  const style = isElement(el) && el.getAttribute("style");
@@ -585,20 +969,33 @@ function getListItemLevel(el) {
585
969
  return (level ? Number.parseInt(level, 10) : 1) || 1;
586
970
  }
587
971
  function isWordListElement(el) {
588
- return isElement(el) && el.className ? el.className === "MsoListParagraphCxSpFirst" || el.className === "MsoListParagraphCxSpMiddle" || el.className === "MsoListParagraphCxSpLast" : !1;
972
+ if (!isElement(el))
973
+ return !1;
974
+ if (el.className && (el.className === "MsoListParagraphCxSpFirst" || el.className === "MsoListParagraphCxSpMiddle" || el.className === "MsoListParagraphCxSpLast"))
975
+ return !0;
976
+ const style = el.getAttribute("style");
977
+ return !!(style && /mso-list:\s*l\d+\s+level\d+\s+lfo\d+/.test(style));
978
+ }
979
+ function getHeadingStyle(el) {
980
+ const tag = tagName(el);
981
+ if (tag && HTML_HEADER_TAGS[tag])
982
+ return HTML_HEADER_TAGS[tag]?.style;
589
983
  }
590
984
  function createWordRules() {
591
985
  return [
592
986
  {
593
987
  deserialize(el, next) {
594
- if (tagName(el) === "p" && isWordListElement(el))
988
+ const tag = tagName(el);
989
+ if ((tag === "p" || HTML_HEADER_TAGS[tag || ""]) && isWordListElement(el)) {
990
+ const headingStyle = getHeadingStyle(el);
595
991
  return {
596
992
  ...DEFAULT_BLOCK,
597
993
  listItem: getListItemStyle(el),
598
994
  level: getListItemLevel(el),
599
- style: BLOCK_DEFAULT_STYLE,
995
+ style: headingStyle || BLOCK_DEFAULT_STYLE,
600
996
  children: next(el.childNodes)
601
997
  };
998
+ }
602
999
  }
603
1000
  }
604
1001
  ];
@@ -606,16 +1003,71 @@ function createWordRules() {
606
1003
  function createRules(schema, options) {
607
1004
  return [
608
1005
  ...createWordRules(),
1006
+ ...createWordOnlineRules(schema, options),
609
1007
  ...createNotionRules(),
610
1008
  ...createGDocsRules(schema),
611
1009
  ...createHTMLRules(schema, options)
612
1010
  ];
613
1011
  }
1012
+ function trimWhitespace(context, mode, blocks2) {
1013
+ const trimmedBlocks = [];
1014
+ let consecutiveEmptyCount = 0;
1015
+ for (const block of blocks2) {
1016
+ const trimmedBlock = isTextBlock(context, block) ? trimTextBlockWhitespace(block) : block;
1017
+ if (mode === "preserve") {
1018
+ trimmedBlocks.push(trimmedBlock);
1019
+ continue;
1020
+ }
1021
+ if (mode === "remove") {
1022
+ if (isEmptyTextBlock(context, trimmedBlock))
1023
+ continue;
1024
+ trimmedBlocks.push(trimmedBlock);
1025
+ continue;
1026
+ }
1027
+ if (mode === "normalize") {
1028
+ if (isEmptyTextBlock(context, trimmedBlock)) {
1029
+ consecutiveEmptyCount++, consecutiveEmptyCount === 1 && trimmedBlocks.push(trimmedBlock);
1030
+ continue;
1031
+ }
1032
+ trimmedBlocks.push(trimmedBlock), consecutiveEmptyCount = 0;
1033
+ }
1034
+ }
1035
+ return trimmedBlocks;
1036
+ }
1037
+ function isEmptyTextBlock(context, block) {
1038
+ return !(!isTextBlock(context, block) || block.children.some(
1039
+ (child) => !isSpan(context, child) || child.text.trim() !== ""
1040
+ ));
1041
+ }
1042
+ function trimTextBlockWhitespace(block) {
1043
+ let index = 0;
1044
+ for (const child of block.children) {
1045
+ if (!isMinimalSpan(child)) {
1046
+ index++;
1047
+ continue;
1048
+ }
1049
+ const nextChild = nextSpan(block, index), prevChild = prevSpan(block, index);
1050
+ index === 0 && (child.text = child.text.replace(/^[^\S\n]+/g, "")), index === block.children.length - 1 && (child.text = child.text.replace(/[^\S\n]+$/g, "")), /\s/.test(child.text.slice(Math.max(0, child.text.length - 1))) && nextChild && isMinimalSpan(nextChild) && /\s/.test(nextChild.text.slice(0, 1)) && (child.text = child.text.replace(/[^\S\n]+$/g, "")), /\s/.test(child.text.slice(0, 1)) && prevChild && isMinimalSpan(prevChild) && /\s/.test(prevChild.text.slice(Math.max(0, prevChild.text.length - 1))) && (child.text = child.text.replace(/^[^\S\n]+/g, "")), child.text || block.children.splice(index, 1), prevChild && isEqual(prevChild.marks, child.marks) && isWhiteSpaceChar(child.text) ? (prevChild.text += " ", block.children.splice(index, 1)) : nextChild && isEqual(nextChild.marks, child.marks) && isWhiteSpaceChar(child.text) && (nextChild.text = ` ${nextChild.text}`, block.children.splice(index, 1)), index++;
1051
+ }
1052
+ return block;
1053
+ }
1054
+ function nextSpan(block, index) {
1055
+ const next = block.children[index + 1];
1056
+ return next && next._type === "span" ? next : null;
1057
+ }
1058
+ function prevSpan(block, index) {
1059
+ const prev = block.children[index - 1];
1060
+ return prev && prev._type === "span" ? prev : null;
1061
+ }
1062
+ function isWhiteSpaceChar(text) {
1063
+ return ["\xA0", " "].includes(text);
1064
+ }
614
1065
  class HtmlDeserializer {
615
1066
  keyGenerator;
616
1067
  schema;
617
1068
  rules;
618
1069
  parseHtml;
1070
+ whitespaceMode;
619
1071
  _markDefs = [];
620
1072
  /**
621
1073
  * Create a new serializer respecting a Sanity block content type's schema
@@ -628,9 +1080,14 @@ class HtmlDeserializer {
628
1080
  keyGenerator: options.keyGenerator,
629
1081
  matchers: options.matchers
630
1082
  });
631
- this.schema = schema, this.keyGenerator = options.keyGenerator ?? keyGenerator, this.rules = [...rules, ...standardRules];
1083
+ this.schema = schema, this.keyGenerator = options.keyGenerator ?? keyGenerator, this.rules = [...rules, ...standardRules], this.whitespaceMode = unstable_whitespaceOnPasteMode;
632
1084
  const parseHtml = options.parseHtml || defaultParseHtml();
633
- this.parseHtml = (html) => preprocess(html, parseHtml, { unstable_whitespaceOnPasteMode }).body;
1085
+ this.parseHtml = (html) => {
1086
+ const cleanHTML = O(html), doc = parseHtml(cleanHTML);
1087
+ for (const processor of preprocessors)
1088
+ processor(cleanHTML, doc);
1089
+ return doc.body;
1090
+ };
634
1091
  }
635
1092
  /**
636
1093
  * Deserialize HTML.
@@ -641,7 +1098,8 @@ class HtmlDeserializer {
641
1098
  deserialize = (html) => {
642
1099
  this._markDefs = [];
643
1100
  const { parseHtml } = this, fragment = parseHtml(html), children = Array.from(fragment.childNodes), blocks2 = trimWhitespace(
644
- this.schema,
1101
+ { schema: this.schema },
1102
+ this.whitespaceMode,
645
1103
  flattenNestedBlocks(
646
1104
  { schema: this.schema },
647
1105
  ensureRootIsBlocks(
@@ -769,15 +1227,6 @@ class HtmlDeserializer {
769
1227
  }, []);
770
1228
  };
771
1229
  }
772
- function preprocess(html, parseHtml, options) {
773
- const cleanHTML = O(html), doc = parseHtml(normalizeHtmlBeforePreprocess(cleanHTML));
774
- return preprocessors.forEach((processor) => {
775
- processor(cleanHTML, doc, options);
776
- }), doc;
777
- }
778
- function normalizeHtmlBeforePreprocess(html) {
779
- return html.trim();
780
- }
781
1230
  function normalizeBlock(node, options = {}) {
782
1231
  const schema = {
783
1232
  block: {