@portabletext/block-tools 5.0.5 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.js CHANGED
@@ -1,17 +1,15 @@
1
+ import { htmlToPortableText } from "@portabletext/html";
1
2
  import { sanitySchemaToPortableTextSchema } from "@portabletext/sanity-bridge";
2
- import { isTextBlock, isSpan } from "@portabletext/schema";
3
- import { isElement, tagName, PRESERVE_WHITESPACE_TAGS, HTML_BLOCK_TAGS, HTML_HEADER_TAGS, DEFAULT_SPAN, DEFAULT_BLOCK, BLOCK_DEFAULT_STYLE, HTML_LIST_CONTAINER_TAGS, HTML_SPAN_TAGS, HTML_LIST_ITEM_TAGS, HTML_DECORATOR_TAGS, isMinimalSpan, isEqualMarks, defaultParseHtml, flattenNestedBlocks, ensureRootIsBlocks, resolveJsType, isPlaceholderDecorator, isPlaceholderAnnotation, isMinimalBlock, isNodeList } from "./_chunks-es/helpers.js";
4
- var l = { 0: 8203, 1: 8204, 2: 8205, 3: 8290, 4: 8291, 5: 8288, 6: 65279, 7: 8289, 8: 119155, 9: 119156, a: 119157, b: 119158, c: 119159, d: 119160, e: 119161, f: 119162 }, d = { 0: 8203, 1: 8204, 2: 8205, 3: 65279 }, a = { 0: String.fromCodePoint(d[0]), 1: String.fromCodePoint(d[1]), 2: String.fromCodePoint(d[2]), 3: String.fromCodePoint(d[3]) };
5
- new Array(4).fill(String.fromCodePoint(d[0])).join("");
6
- Object.fromEntries(Object.entries(a).map((t) => [t[1], +t[0]]));
7
- Object.fromEntries(Object.entries(l).map((t) => t.reverse()));
8
- var _ = `${Object.values(l).map((t) => `\\u{${t.toString(16)}}`).join("")}`, u = new RegExp(`[${_}]{4,}`, "gu");
9
- function D(t) {
10
- var e;
11
- return { cleaned: t.replace(u, ""), encoded: ((e = t.match(u)) == null ? void 0 : e[0]) || "" };
12
- }
13
- function M(t) {
14
- return t && JSON.parse(D(JSON.stringify(t)).cleaned);
3
+ import { isSpan } from "@portabletext/schema";
4
+ function isEqualMarks(a, b) {
5
+ if (!a || !b)
6
+ return a === b;
7
+ if (a.length !== b.length)
8
+ return !1;
9
+ for (let index = 0; index < a.length; index++)
10
+ if (a[index] !== b[index])
11
+ return !1;
12
+ return !0;
15
13
  }
16
14
  function keyGenerator() {
17
15
  return randomKey(12);
@@ -26,1238 +24,6 @@ for (let i = 0; i < 256; ++i)
26
24
  function randomKey(length) {
27
25
  return whatwgRNG(length).reduce((str, n) => str + byteToHex[n], "").slice(0, length);
28
26
  }
29
- function isWordOnlineHtml(html) {
30
- return /class="(?:TextRun|NormalTextRun)[^"]*SCXW\d+[^"]*BCX\d+/.test(html) || /class="EOP[^"]*SCXW\d+/.test(html);
31
- }
32
- function isWordOnlineTextRun(el) {
33
- return !isElement(el) || tagName(el) !== "span" ? !1 : el.classList.contains("TextRun") && !el.classList.contains("EOP");
34
- }
35
- function isNormalTextRun(el) {
36
- return !isElement(el) || tagName(el) !== "span" ? !1 : el.classList.contains("NormalTextRun");
37
- }
38
- function isTextRunSpan(el) {
39
- return !isElement(el) || tagName(el) !== "span" ? !1 : el.classList.contains("TextRun") && !el.classList.contains("NormalTextRun") && !el.classList.contains("EOP");
40
- }
41
- function isFindHit(el) {
42
- return !isElement(el) || tagName(el) !== "span" ? !1 : el.classList.contains("FindHit");
43
- }
44
- function isInHeading(el) {
45
- let current = el;
46
- for (; current; ) {
47
- if (isElement(current) && tagName(current) === "word-online-block" && /^heading \d$/.test(current.getAttribute("data-parastyle") ?? ""))
48
- return !0;
49
- current = current.parentNode;
50
- }
51
- return !1;
52
- }
53
- function isInBlockquote(el) {
54
- let current = el;
55
- for (; current; ) {
56
- if (isElement(current) && tagName(current) === "word-online-block" && current.getAttribute("data-parastyle") === "Quote")
57
- return !0;
58
- current = current.parentNode;
59
- }
60
- return !1;
61
- }
62
- function hasStrongFormatting(el) {
63
- const style = el.getAttribute("style") ?? "";
64
- return el.classList.contains("MacChromeBold") || /font-weight\s*:\s*bold/.test(style);
65
- }
66
- function hasEmphasisFormatting(el) {
67
- const style = el.getAttribute("style") ?? "";
68
- return /font-style\s*:\s*italic/.test(style);
69
- }
70
- function hasUnderlineFormatting(el) {
71
- const style = el.getAttribute("style") ?? "";
72
- return el.classList.contains("Underlined") || /text-decoration\s*:\s*underline/.test(style);
73
- }
74
- function hasStrikethroughFormatting(el) {
75
- const style = el.getAttribute("style") ?? "";
76
- return el.classList.contains("Strikethrough") || /text-decoration\s*:\s*line-through/.test(style);
77
- }
78
- function hasFormatting(el) {
79
- return hasStrongFormatting(el) || hasEmphasisFormatting(el) || hasUnderlineFormatting(el) || hasStrikethroughFormatting(el);
80
- }
81
- function preprocessWordOnline(html, doc) {
82
- if (!isWordOnlineHtml(html))
83
- return doc;
84
- const paragraphs = Array.from(
85
- doc.querySelectorAll('p.Paragraph[role="heading"]')
86
- );
87
- for (const paragraph of paragraphs) {
88
- const ariaLevel = paragraph.getAttribute("aria-level");
89
- if (ariaLevel) {
90
- const wrapper = doc.createElement("word-online-block");
91
- wrapper.setAttribute("data-parastyle", `heading ${ariaLevel}`);
92
- const parent = paragraph.parentNode;
93
- if (parent) {
94
- for (parent.insertBefore(wrapper, paragraph); paragraph.firstChild; )
95
- wrapper.appendChild(paragraph.firstChild);
96
- parent.removeChild(paragraph);
97
- }
98
- }
99
- }
100
- let child = doc.body.firstChild;
101
- for (; child; ) {
102
- const next = child.nextSibling;
103
- if (!isElement(child) || !tagName(child)?.includes("span")) {
104
- child = next;
105
- continue;
106
- }
107
- const paraStyle = getParaStyle(child);
108
- if (!paraStyle) {
109
- child = next;
110
- continue;
111
- }
112
- const group = [child];
113
- let sibling = next;
114
- for (; sibling && !(!isElement(sibling) || getParaStyle(sibling) !== paraStyle); )
115
- group.push(sibling), sibling = sibling.nextSibling;
116
- const wrapper = doc.createElement("word-online-block");
117
- wrapper.setAttribute("data-parastyle", paraStyle), doc.body.insertBefore(wrapper, child);
118
- for (const span of group)
119
- wrapper.appendChild(span);
120
- child = sibling;
121
- }
122
- const textRunSpans = Array.from(doc.body.querySelectorAll("span")).filter(
123
- isTextRunSpan
124
- );
125
- for (const textRunSpan of textRunSpans) {
126
- const normalTextRuns = Array.from(textRunSpan.childNodes).filter(
127
- isNormalTextRun
128
- );
129
- for (const normalTextRun of normalTextRuns) {
130
- let foundNestedSpan = !0;
131
- for (; foundNestedSpan; ) {
132
- const children = Array.from(normalTextRun.childNodes), nestedSpanIndex = children.findIndex(
133
- (node) => isElement(node) && tagName(node) === "span" && node.textContent.trim() === ""
134
- );
135
- if (nestedSpanIndex === -1) {
136
- foundNestedSpan = !1;
137
- break;
138
- }
139
- const nestedSpan = children.at(nestedSpanIndex);
140
- if (!nestedSpan) {
141
- foundNestedSpan = !1;
142
- break;
143
- }
144
- const spaceText = nestedSpan.textContent?.replace(/\u00a0/g, " ") ?? "", isSpaceAtBeginning = !children.slice(0, nestedSpanIndex).some((n) => n.nodeType === 3);
145
- if (normalTextRun.removeChild(nestedSpan), isSpaceAtBeginning) {
146
- const firstTextNode = Array.from(normalTextRun.childNodes).find(
147
- (n) => n.nodeType === 3
148
- );
149
- if (firstTextNode)
150
- firstTextNode.textContent = spaceText + (firstTextNode.textContent || "");
151
- else {
152
- const spaceNode = doc.createTextNode(spaceText);
153
- normalTextRun.insertBefore(spaceNode, normalTextRun.firstChild);
154
- }
155
- } else {
156
- const nextSibling = textRunSpan.nextSibling, currentHasFormatting = hasFormatting(textRunSpan);
157
- if (nextSibling && isElement(nextSibling) && isTextRunSpan(nextSibling)) {
158
- const nextHasFormatting = hasFormatting(nextSibling);
159
- if (currentHasFormatting && !nextHasFormatting) {
160
- const nextNormalTextRun = Array.from(nextSibling.childNodes).find(
161
- isNormalTextRun
162
- );
163
- if (nextNormalTextRun && isElement(nextNormalTextRun)) {
164
- const firstChild = nextNormalTextRun.firstChild;
165
- if (firstChild && firstChild.nodeType === 3)
166
- firstChild.textContent = spaceText + (firstChild.textContent ?? "");
167
- else {
168
- const spaceNode = doc.createTextNode(spaceText);
169
- nextNormalTextRun.insertBefore(
170
- spaceNode,
171
- nextNormalTextRun.firstChild
172
- );
173
- }
174
- }
175
- } else {
176
- const lastTextNode = Array.from(normalTextRun.childNodes).find(
177
- (n) => n.nodeType === 3
178
- );
179
- if (lastTextNode)
180
- lastTextNode.textContent = (lastTextNode.textContent ?? "") + spaceText;
181
- else {
182
- const spaceNode = doc.createTextNode(spaceText);
183
- normalTextRun.appendChild(spaceNode);
184
- }
185
- }
186
- } else {
187
- const lastTextNode = Array.from(normalTextRun.childNodes).find(
188
- (n) => n.nodeType === 3
189
- );
190
- if (lastTextNode)
191
- lastTextNode.textContent = (lastTextNode.textContent ?? "") + spaceText;
192
- else {
193
- const spaceNode = doc.createTextNode(spaceText);
194
- normalTextRun.appendChild(spaceNode);
195
- }
196
- }
197
- }
198
- }
199
- }
200
- }
201
- return doc;
202
- }
203
- function getParaStyle(element) {
204
- const directStyle = element.getAttribute("data-ccp-parastyle");
205
- if (directStyle)
206
- return directStyle;
207
- if (tagName(element) === "span" && element.classList.contains("TextRun")) {
208
- const normalTextRuns = Array.from(
209
- element.querySelectorAll(".NormalTextRun")
210
- );
211
- if (normalTextRuns.length > 0) {
212
- const firstStyle = normalTextRuns[0].getAttribute("data-ccp-parastyle");
213
- if (firstStyle && normalTextRuns.every(
214
- (normalTextRun) => normalTextRun.getAttribute("data-ccp-parastyle") === firstStyle
215
- ))
216
- return firstStyle;
217
- }
218
- }
219
- }
220
- const _XPathResult = {
221
- BOOLEAN_TYPE: 3,
222
- ORDERED_NODE_ITERATOR_TYPE: 5,
223
- UNORDERED_NODE_SNAPSHOT_TYPE: 6
224
- };
225
- function preprocessGDocs(_html, doc) {
226
- let gDocsRootOrSiblingNode = doc.evaluate(
227
- '//*[@id and contains(@id, "docs-internal-guid")]',
228
- doc,
229
- null,
230
- _XPathResult.ORDERED_NODE_ITERATOR_TYPE,
231
- null
232
- ).iterateNext();
233
- if (gDocsRootOrSiblingNode) {
234
- const isWrappedRootTag = tagName(gDocsRootOrSiblingNode) === "b";
235
- isWrappedRootTag || (gDocsRootOrSiblingNode = doc.body);
236
- const childNodes = doc.evaluate(
237
- "//*",
238
- doc,
239
- null,
240
- _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
241
- null
242
- );
243
- for (let i = childNodes.snapshotLength - 1; i >= 0; i--) {
244
- const elm = childNodes.snapshotItem(i);
245
- elm?.setAttribute("data-is-google-docs", "true"), (elm?.parentElement === gDocsRootOrSiblingNode || !isWrappedRootTag && elm.parentElement === doc.body) && (elm?.setAttribute("data-is-root-node", "true"), tagName(elm)), tagName(elm) === "li" && elm.firstChild && tagName(elm?.firstChild) === "img" && elm.removeChild(elm.firstChild);
246
- }
247
- return isWrappedRootTag && doc.body.firstElementChild?.replaceWith(
248
- ...Array.from(gDocsRootOrSiblingNode.childNodes)
249
- ), doc;
250
- }
251
- return doc;
252
- }
253
- const unwantedWordDocumentPaths = [
254
- "/html/text()",
255
- "/html/head/text()",
256
- "/html/body/text()",
257
- "/html/body/ul/text()",
258
- "/html/body/ol/text()",
259
- "//comment()",
260
- "//style",
261
- "//xml",
262
- "//script",
263
- "//meta",
264
- "//link"
265
- ];
266
- function preprocessHTML(_html, doc) {
267
- const bodyTextNodes = doc.evaluate(
268
- "/html/body/text()",
269
- doc,
270
- null,
271
- _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
272
- null
273
- );
274
- for (let i = bodyTextNodes.snapshotLength - 1; i >= 0; i--) {
275
- const node = bodyTextNodes.snapshotItem(i), text = node.textContent || "";
276
- if (text.replace(/[^\S\n]+$/g, "")) {
277
- const newNode = doc.createElement("span");
278
- newNode.appendChild(doc.createTextNode(text)), node.parentNode?.replaceChild(newNode, node);
279
- } else
280
- node.parentNode?.removeChild(node);
281
- }
282
- const unwantedNodes = doc.evaluate(
283
- unwantedWordDocumentPaths.join("|"),
284
- doc,
285
- null,
286
- _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
287
- null
288
- );
289
- for (let i = unwantedNodes.snapshotLength - 1; i >= 0; i--) {
290
- const unwanted = unwantedNodes.snapshotItem(i);
291
- unwanted && unwanted.parentNode?.removeChild(unwanted);
292
- }
293
- return doc;
294
- }
295
- function preprocessNotion(html, doc) {
296
- const NOTION_REGEX = /<!-- notionvc:.*?-->/g;
297
- if (html.match(NOTION_REGEX)) {
298
- const childNodes = doc.evaluate(
299
- "//*",
300
- doc,
301
- null,
302
- _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
303
- null
304
- );
305
- for (let i = childNodes.snapshotLength - 1; i >= 0; i--)
306
- childNodes.snapshotItem(i)?.setAttribute("data-is-notion", "true");
307
- return doc;
308
- }
309
- return doc;
310
- }
311
- const BLOCK_CONTAINER_ELEMENTS = [
312
- "body",
313
- "table",
314
- "tbody",
315
- "thead",
316
- "tfoot",
317
- "tr",
318
- "ul",
319
- "ol"
320
- ];
321
- function preprocessWhitespace(_2, doc) {
322
- function processNode(node) {
323
- if (node.nodeType === _XPathResult.BOOLEAN_TYPE && !PRESERVE_WHITESPACE_TAGS.includes(
324
- node.parentElement?.tagName.toLowerCase() || ""
325
- )) {
326
- const normalized = node.textContent?.replace(/\s\s+/g, " ").replace(/[\r\n]+/g, " ") || "", parentTag = node.parentElement?.tagName.toLowerCase();
327
- parentTag && BLOCK_CONTAINER_ELEMENTS.includes(parentTag) && normalized.trim() === "" ? node.parentNode?.removeChild(node) : node.textContent = normalized;
328
- } else
329
- for (let i = node.childNodes.length - 1; i >= 0; i--)
330
- processNode(node.childNodes[i]);
331
- }
332
- return processNode(doc.body), doc;
333
- }
334
- const WORD_HTML_REGEX = /(class="?Mso|style=(?:"|')[^"]*?\bmso-|w:WordDocument|<o:\w+>|<\/font>)/, unwantedPaths = [
335
- "//o:p",
336
- "//span[@style='mso-list:Ignore']",
337
- "//span[@style='mso-list: Ignore']"
338
- ], mappedPaths = [
339
- "//p[@class='MsoTocHeading']",
340
- "//p[@class='MsoTitle']",
341
- "//p[@class='MsoToaHeading']",
342
- "//p[@class='MsoSubtitle']",
343
- "//span[@class='MsoSubtleEmphasis']",
344
- "//span[@class='MsoIntenseEmphasis']"
345
- ], elementMap = {
346
- MsoTocHeading: ["h3"],
347
- MsoTitle: ["h1"],
348
- MsoToaHeading: ["h2"],
349
- MsoSubtitle: ["h5"],
350
- MsoSubtleEmphasis: ["span", "em"],
351
- MsoIntenseEmphasis: ["span", "em", "strong"]
352
- // Remove cruft
353
- };
354
- function isWordHtml(html) {
355
- return WORD_HTML_REGEX.test(html);
356
- }
357
- function extractListStyles(doc) {
358
- const map = {};
359
- let found = !1;
360
- const styleEls = doc.querySelectorAll("style");
361
- for (const styleEl of styleEls) {
362
- const css = styleEl.textContent || "", listRulePattern = /@list\s+(l\d+):(level\d+)\s*\{([^}]*)\}/g;
363
- for (let match = listRulePattern.exec(css); match !== null; match = listRulePattern.exec(css)) {
364
- const listId = match[1], level = match[2], ruleBody = match[3], key = `${listId}:${level}`;
365
- /mso-level-number-format\s*:\s*bullet/i.test(ruleBody) ? map[key] = "bullet" : map[key] = "number", found = !0;
366
- }
367
- }
368
- found && doc.body && doc.body.setAttribute("data-word-list-styles", JSON.stringify(map));
369
- }
370
- function preprocessWord(html, doc) {
371
- if (!isWordHtml(html))
372
- return doc;
373
- extractListStyles(doc);
374
- const unwantedNodes = doc.evaluate(
375
- unwantedPaths.join("|"),
376
- doc,
377
- (prefix) => prefix === "o" ? "urn:schemas-microsoft-com:office:office" : null,
378
- _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
379
- null
380
- );
381
- for (let i = unwantedNodes.snapshotLength - 1; i >= 0; i--) {
382
- const unwanted = unwantedNodes.snapshotItem(i);
383
- unwanted?.parentNode && unwanted.parentNode.removeChild(unwanted);
384
- }
385
- const mappedElements = doc.evaluate(
386
- mappedPaths.join("|"),
387
- doc,
388
- null,
389
- _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
390
- null
391
- );
392
- for (let i = mappedElements.snapshotLength - 1; i >= 0; i--) {
393
- const mappedElm = mappedElements.snapshotItem(i), tags = elementMap[mappedElm.className], text = doc.createTextNode(mappedElm.textContent || "");
394
- if (!tags)
395
- continue;
396
- const parentElement = doc.createElement(tags[0]);
397
- let parent = parentElement, child = parentElement;
398
- tags.slice(1).forEach((tag) => {
399
- child = doc.createElement(tag), parent.appendChild(child), parent = child;
400
- }), child.appendChild(text), mappedElm?.parentNode?.replaceChild(parentElement, mappedElm);
401
- }
402
- return doc;
403
- }
404
- const preprocessors = [
405
- preprocessWhitespace,
406
- preprocessNotion,
407
- preprocessWord,
408
- preprocessWordOnline,
409
- preprocessGDocs,
410
- preprocessHTML
411
- ];
412
- function mapParaStyleToBlockStyle(schema, paraStyle) {
413
- const blockStyle = {
414
- "heading 1": "h1",
415
- "heading 2": "h2",
416
- "heading 3": "h3",
417
- "heading 4": "h4",
418
- "heading 5": "h5",
419
- "heading 6": "h6",
420
- Quote: "blockquote"
421
- }[paraStyle] ?? "normal";
422
- return schema.styles.find((style) => style.name === blockStyle)?.name;
423
- }
424
- function createWordOnlineRules(schema, options) {
425
- return [
426
- // Image rule - handles bare Word Online <img> tags with WACImage class
427
- {
428
- deserialize(el) {
429
- if (!isElement(el) || tagName(el) !== "img")
430
- return;
431
- const classNameRaw = el.className;
432
- let className = "";
433
- if (typeof classNameRaw == "string" ? className = classNameRaw : classNameRaw && typeof classNameRaw == "object" && (className = classNameRaw.baseVal || ""), !className.includes("WACImage"))
434
- return;
435
- const src = el.getAttribute("src") ?? void 0, alt = el.getAttribute("alt") ?? void 0, props = Object.fromEntries(
436
- Array.from(el.attributes).map((attr) => [attr.name, attr.value])
437
- ), image = options.matchers?.image?.({
438
- context: {
439
- schema,
440
- keyGenerator: options.keyGenerator ?? keyGenerator
441
- },
442
- props: {
443
- ...props,
444
- ...src ? { src } : {},
445
- ...alt ? { alt } : {}
446
- }
447
- });
448
- if (image)
449
- return {
450
- _type: "__block",
451
- block: image
452
- };
453
- }
454
- },
455
- // Image rule - handles Word Online images wrapped in WACImageContainer
456
- {
457
- deserialize(el) {
458
- if (!isElement(el))
459
- return;
460
- const classNameRaw = el.className;
461
- let className = "";
462
- if (typeof classNameRaw == "string" ? className = classNameRaw : classNameRaw && typeof classNameRaw == "object" && (className = classNameRaw.baseVal || ""), !className.includes("WACImageContainer"))
463
- return;
464
- const img = el.querySelector("img");
465
- if (!img)
466
- return;
467
- const src = img.getAttribute("src") ?? void 0, alt = img.getAttribute("alt") ?? void 0, props = Object.fromEntries(
468
- Array.from(img.attributes).map((attr) => [attr.name, attr.value])
469
- ), isInsideListItem = el.closest("li") !== null;
470
- if (el.closest("p") === null || isInsideListItem) {
471
- const inlineImage = options.matchers?.inlineImage?.({
472
- context: {
473
- schema,
474
- keyGenerator: options.keyGenerator ?? keyGenerator
475
- },
476
- props: {
477
- ...props,
478
- ...src ? { src } : {},
479
- ...alt ? { alt } : {}
480
- }
481
- });
482
- if (inlineImage)
483
- return inlineImage;
484
- }
485
- const image = options.matchers?.image?.({
486
- context: {
487
- schema,
488
- keyGenerator: options.keyGenerator ?? keyGenerator
489
- },
490
- props: {
491
- ...props,
492
- ...src ? { src } : {},
493
- ...alt ? { alt } : {}
494
- }
495
- });
496
- if (image)
497
- return {
498
- _type: "__block",
499
- block: image
500
- };
501
- }
502
- },
503
- // List item rule - handles <li> elements with aria-level
504
- {
505
- deserialize(el, next) {
506
- if (!isElement(el) || tagName(el) !== "li")
507
- return;
508
- const ariaLevel = el.getAttribute("data-aria-level");
509
- if (!ariaLevel)
510
- return;
511
- const listItem = tagName(el.parentNode) === "ol" ? "number" : "bullet";
512
- let childNodesToProcess = el.childNodes, blockStyle = "normal";
513
- if (el.childNodes.length === 1 && el.firstChild && isElement(el.firstChild)) {
514
- const childTag = tagName(el.firstChild);
515
- if (childTag && (HTML_BLOCK_TAGS[childTag] || HTML_HEADER_TAGS[childTag] || childTag === "word-online-block")) {
516
- if (childTag === "word-online-block") {
517
- const paraStyle = el.firstChild.getAttribute("data-parastyle"), foundBlockStyle = paraStyle ? mapParaStyleToBlockStyle(schema, paraStyle) : void 0;
518
- foundBlockStyle && (blockStyle = foundBlockStyle);
519
- }
520
- childNodesToProcess = el.firstChild.childNodes;
521
- }
522
- }
523
- const children = next(childNodesToProcess);
524
- let childArray = Array.isArray(children) ? children : [children].filter(Boolean);
525
- for (; childArray.length > 0; ) {
526
- const lastChild = childArray[childArray.length - 1];
527
- if (lastChild && typeof lastChild == "object" && "text" in lastChild) {
528
- const text = lastChild.text.trimEnd();
529
- if (text === "")
530
- childArray = childArray.slice(0, -1);
531
- else if (text !== lastChild.text) {
532
- lastChild.text = text;
533
- break;
534
- } else
535
- break;
536
- } else
537
- break;
538
- }
539
- return {
540
- _type: schema.block.name,
541
- children: childArray,
542
- markDefs: [],
543
- style: blockStyle,
544
- listItem,
545
- level: parseInt(ariaLevel, 10)
546
- };
547
- }
548
- },
549
- // Block style rule - handles paragraph styles like Quote
550
- // The preprocessor wraps grouped NormalTextRun spans in a word-online-block element
551
- {
552
- deserialize(el, next) {
553
- if (!isElement(el))
554
- return;
555
- const paraStyle = el.getAttribute("data-parastyle"), blockStyle = paraStyle ? mapParaStyleToBlockStyle(schema, paraStyle) : void 0;
556
- if (!blockStyle)
557
- return;
558
- const children = next(el.childNodes);
559
- return {
560
- _type: schema.block.name,
561
- style: blockStyle,
562
- markDefs: [],
563
- children: Array.isArray(children) ? children : children ? [children] : []
564
- };
565
- }
566
- },
567
- // TextRun rule
568
- {
569
- deserialize(el) {
570
- if (isWordOnlineTextRun(el)) {
571
- if (!isElement(el) || !el.textContent)
572
- return;
573
- const text = Array.from(el.childNodes).filter(
574
- (node) => isNormalTextRun(node) || isFindHit(node)
575
- ).map((span2) => isElement(span2) ? span2.textContent ?? "" : "").join("");
576
- if (!text)
577
- return;
578
- const span = {
579
- ...DEFAULT_SPAN,
580
- marks: [],
581
- text
582
- };
583
- if (hasStrongFormatting(el) && span.marks.push("strong"), hasEmphasisFormatting(el) && !isInHeading(el) && !isInBlockquote(el) && span.marks.push("em"), hasUnderlineFormatting(el))
584
- if (isElement(el) && el.parentElement && tagName(el.parentElement) === "a") {
585
- const linkElement = el.parentElement;
586
- if (linkElement) {
587
- const prevSibling = linkElement.previousSibling, nextSibling = linkElement.nextSibling, hasPrevUnderline = prevSibling && isElement(prevSibling) && hasUnderlineFormatting(prevSibling), hasNextUnderline = nextSibling && isElement(nextSibling) && hasUnderlineFormatting(nextSibling);
588
- (hasPrevUnderline || hasNextUnderline) && span.marks.push("underline");
589
- }
590
- } else
591
- span.marks.push("underline");
592
- return hasStrikethroughFormatting(el) && span.marks.push("strike-through"), span;
593
- }
594
- }
595
- }
596
- ];
597
- }
598
- const LIST_CONTAINER_TAGS = Object.keys(HTML_LIST_CONTAINER_TAGS);
599
- function isEmphasis$1(el) {
600
- const style = isElement(el) && el.getAttribute("style");
601
- return /font-style\s*:\s*italic/.test(style || "");
602
- }
603
- function isStrong$1(el) {
604
- const style = isElement(el) && el.getAttribute("style");
605
- return /font-weight\s*:\s*700/.test(style || "");
606
- }
607
- function isUnderline$1(el) {
608
- if (!isElement(el) || tagName(el.parentNode) === "a")
609
- return !1;
610
- const style = isElement(el) && el.getAttribute("style");
611
- return /text-decoration\s*:\s*underline/.test(style || "");
612
- }
613
- function isStrikethrough(el) {
614
- const style = isElement(el) && el.getAttribute("style");
615
- return /text-decoration\s*:\s*(?:.*line-through.*;)/.test(style || "");
616
- }
617
- function isGoogleDocs(el) {
618
- return isElement(el) && !!el.getAttribute("data-is-google-docs");
619
- }
620
- function isRootNode(el) {
621
- return isElement(el) && !!el.getAttribute("data-is-root-node");
622
- }
623
- function getListItemStyle$1(el) {
624
- const parentTag = tagName(el.parentNode);
625
- if (!(parentTag && !LIST_CONTAINER_TAGS.includes(parentTag)))
626
- return tagName(el.parentNode) === "ul" ? "bullet" : "number";
627
- }
628
- function getListItemLevel$1(el) {
629
- let level = 0;
630
- if (tagName(el) === "li") {
631
- let parentNode = el.parentNode;
632
- for (; parentNode; ) {
633
- const parentTag = tagName(parentNode);
634
- parentTag && LIST_CONTAINER_TAGS.includes(parentTag) && level++, parentNode = parentNode.parentNode;
635
- }
636
- } else
637
- level = 1;
638
- return level;
639
- }
640
- const blocks = {
641
- ...HTML_BLOCK_TAGS,
642
- ...HTML_HEADER_TAGS
643
- };
644
- function getBlockStyle(schema, el) {
645
- const childTag = tagName(el.firstChild), block = childTag && blocks[childTag];
646
- return block ? schema.styles.some((style) => style.name === block.style) ? block.style : BLOCK_DEFAULT_STYLE : BLOCK_DEFAULT_STYLE;
647
- }
648
- function createGDocsRules(schema) {
649
- return [
650
- {
651
- deserialize(el, next) {
652
- if (isElement(el) && tagName(el) === "span" && isGoogleDocs(el)) {
653
- if (!el.textContent)
654
- return !el.previousSibling && !el.nextSibling && el.setAttribute("data-lonely-child", "true"), next(el.childNodes);
655
- const span = {
656
- ...DEFAULT_SPAN,
657
- marks: [],
658
- text: el.textContent
659
- };
660
- return isStrong$1(el) && span.marks.push("strong"), isUnderline$1(el) && span.marks.push("underline"), isStrikethrough(el) && span.marks.push("strike-through"), isEmphasis$1(el) && span.marks.push("em"), span;
661
- }
662
- }
663
- },
664
- {
665
- deserialize(el, next) {
666
- if (tagName(el) === "li" && isGoogleDocs(el))
667
- return {
668
- ...DEFAULT_BLOCK,
669
- listItem: getListItemStyle$1(el),
670
- level: getListItemLevel$1(el),
671
- style: getBlockStyle(schema, el),
672
- children: next(el.firstChild?.childNodes || [])
673
- };
674
- }
675
- },
676
- {
677
- deserialize(el) {
678
- if (tagName(el) === "br" && isGoogleDocs(el) && isElement(el) && el.classList.contains("apple-interchange-newline"))
679
- return {
680
- ...DEFAULT_SPAN,
681
- text: ""
682
- };
683
- if (tagName(el) === "br" && isGoogleDocs(el) && isElement(el) && el?.parentNode?.textContent === "")
684
- return {
685
- ...DEFAULT_SPAN,
686
- text: ""
687
- };
688
- if (tagName(el) === "br" && isGoogleDocs(el) && isElement(el) && isRootNode(el))
689
- return {
690
- ...DEFAULT_SPAN,
691
- text: ""
692
- };
693
- }
694
- }
695
- ];
696
- }
697
- const whitespaceTextNodeRule = {
698
- deserialize(node) {
699
- return node.nodeName === "#text" && isWhitespaceTextNode(node) ? {
700
- ...DEFAULT_SPAN,
701
- marks: [],
702
- text: (node.textContent ?? "").replace(/\s\s+/g, " ")
703
- } : void 0;
704
- }
705
- };
706
- function isWhitespaceTextNode(node) {
707
- const isWhitespaceOnly = node.nodeType === 3 && (node.textContent || "").replace(/[\r\n]/g, " ").replace(/\s\s+/g, " ") === " ", hasSiblingContext = node.nextSibling && node.nextSibling.nodeType !== 3 && node.previousSibling && node.previousSibling.nodeType !== 3, hasParentSiblingContext = node.parentNode && tagName(node.parentNode) === "span" && !node.nextSibling && !node.previousSibling && node.parentNode.previousSibling && node.parentNode.previousSibling.nodeType !== 3 && node.parentNode.nextSibling && node.parentNode.nextSibling.nodeType !== 3;
708
- return (isWhitespaceOnly && (hasSiblingContext || hasParentSiblingContext) || node.textContent !== " ") && tagName(node.parentNode) !== "body";
709
- }
710
- function resolveListItem(schema, listNodeTagName) {
711
- if (listNodeTagName === "ul" && schema.lists.some((list) => list.name === "bullet"))
712
- return "bullet";
713
- if (listNodeTagName === "ol" && schema.lists.some((list) => list.name === "number"))
714
- return "number";
715
- }
716
- function createHTMLRules(schema, options) {
717
- return [
718
- whitespaceTextNodeRule,
719
- {
720
- // Pre element
721
- deserialize(el) {
722
- if (tagName(el) !== "pre")
723
- return;
724
- const isCodeEnabled = schema.styles.some(
725
- (style) => style.name === "code"
726
- );
727
- return {
728
- _type: "block",
729
- style: "normal",
730
- markDefs: [],
731
- children: [
732
- {
733
- ...DEFAULT_SPAN,
734
- marks: isCodeEnabled ? ["code"] : [],
735
- text: el.textContent || ""
736
- }
737
- ]
738
- };
739
- }
740
- },
741
- // Blockquote element
742
- {
743
- deserialize(el, next) {
744
- if (tagName(el) !== "blockquote")
745
- return;
746
- const blocks2 = {
747
- ...HTML_BLOCK_TAGS,
748
- ...HTML_HEADER_TAGS
749
- };
750
- delete blocks2.blockquote;
751
- const nonBlockquoteBlocks = Object.keys(blocks2), children = [];
752
- return el.childNodes.forEach((node, index) => {
753
- if (el.ownerDocument)
754
- if (node.nodeType === 1 && nonBlockquoteBlocks.includes(
755
- node.localName.toLowerCase()
756
- )) {
757
- const span = el.ownerDocument.createElement("span"), previousChild = children[children.length - 1];
758
- previousChild && previousChild.nodeType === 3 && previousChild.textContent?.trim() && span.appendChild(el.ownerDocument.createTextNode("\r")), node.childNodes.forEach((cn) => {
759
- span.appendChild(cn.cloneNode(!0));
760
- }), index !== el.childNodes.length && span.appendChild(el.ownerDocument.createTextNode("\r")), children.push(span);
761
- } else
762
- children.push(node);
763
- }), {
764
- _type: "block",
765
- style: "blockquote",
766
- markDefs: [],
767
- children: next(children)
768
- };
769
- }
770
- },
771
- // Block elements
772
- {
773
- deserialize(el, next) {
774
- const blocks2 = {
775
- ...HTML_BLOCK_TAGS,
776
- ...HTML_HEADER_TAGS
777
- }, tag = tagName(el);
778
- let block = tag ? blocks2[tag] : void 0;
779
- if (!block)
780
- return;
781
- if (el.parentNode && tagName(el.parentNode) === "li")
782
- return next(el.childNodes);
783
- const blockStyle = block.style;
784
- return schema.styles.some((style) => style.name === blockStyle) || (block = DEFAULT_BLOCK), {
785
- ...block,
786
- children: next(el.childNodes)
787
- };
788
- }
789
- },
790
- // Ignore span tags
791
- {
792
- deserialize(el, next) {
793
- const tag = tagName(el);
794
- if (!(!tag || !(tag in HTML_SPAN_TAGS)))
795
- return next(el.childNodes);
796
- }
797
- },
798
- // Ignore div tags
799
- {
800
- deserialize(el, next) {
801
- if (tagName(el) === "div")
802
- return next(el.childNodes);
803
- }
804
- },
805
- // Ignore list containers
806
- {
807
- deserialize(el, next) {
808
- const tag = tagName(el);
809
- if (!(!tag || !(tag in HTML_LIST_CONTAINER_TAGS)))
810
- return next(el.childNodes);
811
- }
812
- },
813
- // Deal with br's
814
- {
815
- deserialize(el) {
816
- if (tagName(el) === "br")
817
- return {
818
- ...DEFAULT_SPAN,
819
- text: `
820
- `
821
- };
822
- }
823
- },
824
- // Deal with list items
825
- {
826
- deserialize(el, next, block) {
827
- const tag = tagName(el), listItem = tag ? HTML_LIST_ITEM_TAGS[tag] : void 0;
828
- if (!listItem)
829
- return;
830
- const parentTag = tagName(el.parentNode) || "", listTag = HTML_LIST_CONTAINER_TAGS[parentTag] ? parentTag : "ul", enabledListItem = resolveListItem(schema, listTag);
831
- return enabledListItem ? (listItem.listItem = enabledListItem, {
832
- ...listItem,
833
- children: next(el.childNodes)
834
- }) : block({ _type: "block", children: next(el.childNodes) });
835
- }
836
- },
837
- // Deal with decorators - this is a limited set of known html elements that we know how to deserialize
838
- {
839
- deserialize(el, next) {
840
- const decorator = HTML_DECORATOR_TAGS[tagName(el) || ""];
841
- if (!(!decorator || !schema.decorators.some(
842
- (decoratorType) => decoratorType.name === decorator
843
- )))
844
- return {
845
- _type: "__decorator",
846
- name: decorator,
847
- children: next(el.childNodes)
848
- };
849
- }
850
- },
851
- // Special case for hyperlinks, add annotation (if allowed by schema),
852
- // If not supported just write out the link text and href in plain text.
853
- {
854
- deserialize(el, next) {
855
- if (tagName(el) !== "a")
856
- return;
857
- const linkEnabled = schema.annotations.some(
858
- (annotation) => annotation.name === "link"
859
- ), href = isElement(el) && el.getAttribute("href");
860
- return href ? linkEnabled ? {
861
- _type: "__annotation",
862
- markDef: {
863
- _key: options.keyGenerator ? options.keyGenerator() : keyGenerator(),
864
- _type: "link",
865
- href
866
- },
867
- children: next(el.childNodes)
868
- } : el.appendChild(el.ownerDocument.createTextNode(` (${href})`)) && next(el.childNodes) : next(el.childNodes);
869
- }
870
- },
871
- {
872
- deserialize(el, next) {
873
- if (isElement(el) && (tagName(el) === "td" || tagName(el) === "th"))
874
- return {
875
- ...DEFAULT_BLOCK,
876
- children: next(el.childNodes)
877
- };
878
- }
879
- },
880
- {
881
- deserialize(el) {
882
- if (isElement(el) && tagName(el) === "img") {
883
- const src = el.getAttribute("src") ?? void 0, alt = el.getAttribute("alt") ?? void 0, props = Object.fromEntries(
884
- Array.from(el.attributes).map((attr) => [attr.name, attr.value])
885
- ), ancestorOfLonelyChild = el?.parentElement?.parentElement?.getAttribute("data-lonely-child"), ancestorOfListItem = el.closest("li") !== null;
886
- if (ancestorOfLonelyChild && !ancestorOfListItem) {
887
- const image2 = options.matchers?.image?.({
888
- context: {
889
- schema,
890
- keyGenerator: options.keyGenerator ?? keyGenerator
891
- },
892
- props: {
893
- ...props,
894
- ...src ? { src } : {},
895
- ...alt ? { alt } : {}
896
- }
897
- });
898
- if (image2)
899
- return {
900
- _type: "__block",
901
- block: image2
902
- };
903
- }
904
- const inlineImage = options.matchers?.inlineImage?.({
905
- context: {
906
- schema,
907
- keyGenerator: options.keyGenerator ?? keyGenerator
908
- },
909
- props: {
910
- ...props,
911
- ...src ? { src } : {},
912
- ...alt ? { alt } : {}
913
- }
914
- });
915
- if (inlineImage)
916
- return inlineImage;
917
- const image = options.matchers?.image?.({
918
- context: {
919
- schema,
920
- keyGenerator: options.keyGenerator ?? keyGenerator
921
- },
922
- props: {
923
- ...props,
924
- ...src ? { src } : {},
925
- ...alt ? { alt } : {}
926
- }
927
- });
928
- if (image)
929
- return {
930
- _type: "__block",
931
- block: image
932
- };
933
- }
934
- }
935
- }
936
- ];
937
- }
938
- function isEmphasis(el) {
939
- const style = isElement(el) && el.getAttribute("style");
940
- return /font-style:italic/.test(style || "");
941
- }
942
- function isStrong(el) {
943
- const style = isElement(el) && el.getAttribute("style");
944
- return /font-weight:700/.test(style || "") || /font-weight:600/.test(style || "");
945
- }
946
- function isUnderline(el) {
947
- const style = isElement(el) && el.getAttribute("style");
948
- return /text-decoration:underline/.test(style || "");
949
- }
950
- function isNotion(el) {
951
- return isElement(el) && !!el.getAttribute("data-is-notion");
952
- }
953
- function createNotionRules() {
954
- return [
955
- {
956
- deserialize(el) {
957
- if (isElement(el) && tagName(el) === "span" && isNotion(el)) {
958
- const span = {
959
- ...DEFAULT_SPAN,
960
- marks: [],
961
- text: el.textContent
962
- };
963
- return isStrong(el) && span.marks.push("strong"), isUnderline(el) && span.marks.push("underline"), isEmphasis(el) && span.marks.push("em"), span;
964
- }
965
- }
966
- }
967
- ];
968
- }
969
- function getListStyleMap(el) {
970
- if (!isElement(el))
971
- return {};
972
- const body = el.closest("body") || el.ownerDocument?.body;
973
- if (!body)
974
- return {};
975
- const data = body.getAttribute("data-word-list-styles");
976
- if (!data)
977
- return {};
978
- try {
979
- return JSON.parse(data);
980
- } catch {
981
- return {};
982
- }
983
- }
984
- function getListItemStyle(el) {
985
- const style = isElement(el) && el.getAttribute("style");
986
- if (!style)
987
- return;
988
- const msoListMatch = style.match(/mso-list:\s*(l\d+)\s+(level\d+)\s+lfo\d+/);
989
- if (!msoListMatch)
990
- return;
991
- const key = `${msoListMatch[1]}:${msoListMatch[2]}`;
992
- return getListStyleMap(el)[key] || "bullet";
993
- }
994
- function getListItemLevel(el) {
995
- const style = isElement(el) && el.getAttribute("style");
996
- if (!style)
997
- return;
998
- const levelMatch = style.match(/level\d+/);
999
- if (!levelMatch)
1000
- return;
1001
- const [level] = levelMatch[0].match(/\d/) || [];
1002
- return (level ? Number.parseInt(level, 10) : 1) || 1;
1003
- }
1004
- function isWordListElement(el) {
1005
- if (!isElement(el))
1006
- return !1;
1007
- if (el.className && (el.className === "MsoListParagraphCxSpFirst" || el.className === "MsoListParagraphCxSpMiddle" || el.className === "MsoListParagraphCxSpLast"))
1008
- return !0;
1009
- const style = el.getAttribute("style");
1010
- return !!(style && /mso-list:\s*l\d+\s+level\d+\s+lfo\d+/.test(style));
1011
- }
1012
- function getHeadingStyle(el) {
1013
- const tag = tagName(el);
1014
- if (tag && HTML_HEADER_TAGS[tag])
1015
- return HTML_HEADER_TAGS[tag]?.style;
1016
- }
1017
- function createWordRules() {
1018
- return [
1019
- {
1020
- deserialize(el, next) {
1021
- const tag = tagName(el);
1022
- if ((tag === "p" || HTML_HEADER_TAGS[tag || ""]) && isWordListElement(el)) {
1023
- const headingStyle = getHeadingStyle(el);
1024
- return {
1025
- ...DEFAULT_BLOCK,
1026
- listItem: getListItemStyle(el),
1027
- level: getListItemLevel(el),
1028
- style: headingStyle || BLOCK_DEFAULT_STYLE,
1029
- children: next(el.childNodes)
1030
- };
1031
- }
1032
- }
1033
- }
1034
- ];
1035
- }
1036
- function createRules(schema, options) {
1037
- return [
1038
- ...createWordRules(),
1039
- ...createWordOnlineRules(schema, options),
1040
- ...createNotionRules(),
1041
- ...createGDocsRules(schema),
1042
- ...createHTMLRules(schema, options)
1043
- ];
1044
- }
1045
- function trimWhitespace(context, mode, blocks2) {
1046
- const trimmedBlocks = [];
1047
- let consecutiveEmptyCount = 0;
1048
- for (const block of blocks2) {
1049
- const trimmedBlock = isTextBlock(context, block) ? trimTextBlockWhitespace(block) : block;
1050
- if (mode === "preserve") {
1051
- trimmedBlocks.push(trimmedBlock);
1052
- continue;
1053
- }
1054
- if (mode === "remove") {
1055
- if (isEmptyTextBlock(context, trimmedBlock))
1056
- continue;
1057
- trimmedBlocks.push(trimmedBlock);
1058
- continue;
1059
- }
1060
- if (mode === "normalize") {
1061
- if (isEmptyTextBlock(context, trimmedBlock)) {
1062
- consecutiveEmptyCount++, consecutiveEmptyCount === 1 && trimmedBlocks.push(trimmedBlock);
1063
- continue;
1064
- }
1065
- trimmedBlocks.push(trimmedBlock), consecutiveEmptyCount = 0;
1066
- }
1067
- }
1068
- return trimmedBlocks;
1069
- }
1070
- function isEmptyTextBlock(context, block) {
1071
- return !(!isTextBlock(context, block) || block.children.some(
1072
- (child) => !isSpan(context, child) || child.text.trim() !== ""
1073
- ));
1074
- }
1075
- function trimTextBlockWhitespace(block) {
1076
- let index = 0;
1077
- for (const child of block.children) {
1078
- if (!isMinimalSpan(child)) {
1079
- index++;
1080
- continue;
1081
- }
1082
- const nextChild = nextSpan(block, index), prevChild = prevSpan(block, index);
1083
- index === 0 && (child.text = child.text.replace(/^[^\S\n]+/g, "")), index === block.children.length - 1 && (child.text = child.text.replace(/[^\S\n]+$/g, "")), /\s/.test(child.text.slice(Math.max(0, child.text.length - 1))) && nextChild && isMinimalSpan(nextChild) && /\s/.test(nextChild.text.slice(0, 1)) && (child.text = child.text.replace(/[^\S\n]+$/g, "")), /\s/.test(child.text.slice(0, 1)) && prevChild && isMinimalSpan(prevChild) && /\s/.test(prevChild.text.slice(Math.max(0, prevChild.text.length - 1))) && (child.text = child.text.replace(/^[^\S\n]+/g, "")), child.text || block.children.splice(index, 1), prevChild && Array.isArray(prevChild.marks) && isEqualMarks(prevChild.marks, child.marks) && isWhiteSpaceChar(child.text) ? (prevChild.text += " ", block.children.splice(index, 1)) : nextChild && Array.isArray(nextChild.marks) && isEqualMarks(nextChild.marks, child.marks) && isWhiteSpaceChar(child.text) && (nextChild.text = ` ${nextChild.text}`, block.children.splice(index, 1)), index++;
1084
- }
1085
- return block;
1086
- }
1087
- function nextSpan(block, index) {
1088
- const next = block.children[index + 1];
1089
- return next && next._type === "span" ? next : null;
1090
- }
1091
- function prevSpan(block, index) {
1092
- const prev = block.children[index - 1];
1093
- return prev && prev._type === "span" ? prev : null;
1094
- }
1095
- function isWhiteSpaceChar(text) {
1096
- return ["\xA0", " "].includes(text);
1097
- }
1098
- class HtmlDeserializer {
1099
- keyGenerator;
1100
- schema;
1101
- rules;
1102
- parseHtml;
1103
- whitespaceMode;
1104
- _markDefs = [];
1105
- /**
1106
- * Create a new serializer respecting a Sanity block content type's schema
1107
- *
1108
- * @param blockContentType - Schema type for array containing _at least_ a block child type
1109
- * @param options - Options for the deserialization process
1110
- */
1111
- constructor(schema, options = {}) {
1112
- const { rules = [], unstable_whitespaceOnPasteMode = "preserve" } = options, standardRules = createRules(schema, {
1113
- keyGenerator: options.keyGenerator,
1114
- matchers: options.matchers
1115
- });
1116
- this.schema = schema, this.keyGenerator = options.keyGenerator ?? keyGenerator, this.rules = [...rules, ...standardRules], this.whitespaceMode = unstable_whitespaceOnPasteMode;
1117
- const parseHtml = options.parseHtml || defaultParseHtml();
1118
- this.parseHtml = (html) => {
1119
- const cleanHTML = M(html), doc = parseHtml(cleanHTML);
1120
- for (const processor of preprocessors)
1121
- processor(cleanHTML, doc);
1122
- return doc.body;
1123
- };
1124
- }
1125
- /**
1126
- * Deserialize HTML.
1127
- *
1128
- * @param html - The HTML to deserialize, as a string
1129
- * @returns Array of blocks - either portable text blocks or other allowed blocks
1130
- */
1131
- deserialize = (html) => {
1132
- this._markDefs = [];
1133
- const { parseHtml } = this, fragment = parseHtml(html), children = Array.from(fragment.childNodes), blocks2 = trimWhitespace(
1134
- { schema: this.schema },
1135
- this.whitespaceMode,
1136
- flattenNestedBlocks(
1137
- { schema: this.schema },
1138
- ensureRootIsBlocks(
1139
- this.schema,
1140
- this.deserializeElements(children)
1141
- )
1142
- )
1143
- );
1144
- return this._markDefs.length > 0 && blocks2.filter((block) => isTextBlock({ schema: this.schema }, block)).forEach((block) => {
1145
- block.markDefs = block.markDefs || [], block.markDefs = block.markDefs.concat(
1146
- this._markDefs.filter((def) => block.children.flatMap((child) => child.marks || []).includes(def._key))
1147
- );
1148
- }), blocks2.map((block) => (block._type === "block" && (block._type = this.schema.block.name), block));
1149
- };
1150
- /**
1151
- * Deserialize an array of DOM elements.
1152
- *
1153
- * @param elements - Array of DOM elements to deserialize
1154
- * @returns
1155
- */
1156
- deserializeElements = (elements = []) => {
1157
- let nodes = [];
1158
- return elements.forEach((element) => {
1159
- nodes = nodes.concat(this.deserializeElement(element));
1160
- }), nodes;
1161
- };
1162
- /**
1163
- * Deserialize a DOM element
1164
- *
1165
- * @param element - Deserialize a DOM element
1166
- * @returns
1167
- */
1168
- deserializeElement = (element) => {
1169
- const next = (elements) => {
1170
- if (isNodeList(elements))
1171
- return this.deserializeElements(Array.from(elements));
1172
- if (Array.isArray(elements))
1173
- return this.deserializeElements(elements);
1174
- if (elements)
1175
- return this.deserializeElement(elements);
1176
- }, block = (props) => ({
1177
- _type: "__block",
1178
- block: props
1179
- });
1180
- let node;
1181
- for (let i = 0; i < this.rules.length; i++) {
1182
- const rule = this.rules[i];
1183
- if (!rule.deserialize)
1184
- continue;
1185
- const ret = rule.deserialize(element, next, block), type = resolveJsType(ret);
1186
- if (type !== "array" && type !== "object" && type !== "null" && type !== "undefined")
1187
- throw new Error(
1188
- `A rule returned an invalid deserialized representation: "${node}".`
1189
- );
1190
- if (ret !== void 0) {
1191
- {
1192
- if (ret === null)
1193
- throw new Error("Deserializer rule returned `null`");
1194
- Array.isArray(ret) ? node = ret : isPlaceholderDecorator(ret) ? node = this.deserializeDecorator(ret) : isPlaceholderAnnotation(ret) ? node = this.deserializeAnnotation(ret) : node = ret;
1195
- }
1196
- if (ret && !Array.isArray(ret) && isMinimalBlock(ret) && "listItem" in ret) {
1197
- let parent = element.parentNode?.parentNode;
1198
- for (; parent && tagName(parent) === "li"; )
1199
- parent = parent.parentNode?.parentNode, ret.level = ret.level ? ret.level + 1 : 1;
1200
- }
1201
- ret && !Array.isArray(ret) && isMinimalBlock(ret) && ret.style === "blockquote" && ret.children.forEach((child, index) => {
1202
- isMinimalSpan(child) && child.text === "\r" && (child.text = `
1203
- `, (index === 0 || index === ret.children.length - 1) && ret.children.splice(index, 1));
1204
- });
1205
- break;
1206
- }
1207
- }
1208
- return node || next(element.childNodes) || [];
1209
- };
1210
- /**
1211
- * Deserialize a `__decorator` type
1212
- * (an internal made up type to process decorators exclusively)
1213
- *
1214
- * @param decorator -
1215
- * @returns array of ...
1216
- */
1217
- deserializeDecorator = (decorator) => {
1218
- const { name } = decorator, applyDecorator = (node) => {
1219
- if (isPlaceholderDecorator(node))
1220
- return this.deserializeDecorator(node);
1221
- if (isMinimalSpan(node))
1222
- node.marks = node.marks || [], node.text.trim() && node.marks.unshift(name);
1223
- else if ("children" in node && Array.isArray(node.children)) {
1224
- const block = node;
1225
- block.children = block.children.map(applyDecorator);
1226
- }
1227
- return node;
1228
- };
1229
- return decorator.children.reduce((children, node) => {
1230
- const ret = applyDecorator(node);
1231
- return Array.isArray(ret) ? children.concat(ret) : (children.push(ret), children);
1232
- }, []);
1233
- };
1234
- /**
1235
- * Deserialize a `__annotation` object.
1236
- * (an internal made up type to process annotations exclusively)
1237
- *
1238
- * @param annotation -
1239
- * @returns Array of...
1240
- */
1241
- deserializeAnnotation = (annotation) => {
1242
- const { markDef } = annotation;
1243
- this._markDefs.push(markDef);
1244
- const applyAnnotation = (node) => {
1245
- if (isPlaceholderAnnotation(node))
1246
- return this.deserializeAnnotation(node);
1247
- if (isMinimalSpan(node))
1248
- node.marks = node.marks || [], node.text.trim() && node.marks.unshift(markDef._key);
1249
- else if ("children" in node && Array.isArray(node.children)) {
1250
- const block = node;
1251
- block.children = block.children.map(applyAnnotation);
1252
- }
1253
- return node;
1254
- };
1255
- return annotation.children.reduce((children, node) => {
1256
- const ret = applyAnnotation(node);
1257
- return Array.isArray(ret) ? children.concat(ret) : (children.push(ret), children);
1258
- }, []);
1259
- };
1260
- }
1261
27
  function normalizeBlock(node, options = {}) {
1262
28
  const schema = {
1263
29
  block: {
@@ -1313,7 +79,27 @@ function normalizeBlock(node, options = {}) {
1313
79
  }
1314
80
  function htmlToBlocks(html, schemaType, options = {}) {
1315
81
  const schema = isSanitySchema(schemaType) ? sanitySchemaToPortableTextSchema(schemaType) : schemaType;
1316
- return new HtmlDeserializer(schema, options).deserialize(html).map((block) => normalizeBlock(block, { keyGenerator: options.keyGenerator }));
82
+ return htmlToPortableText(html, {
83
+ schema,
84
+ keyGenerator: options.keyGenerator,
85
+ parseHtml: options.parseHtml,
86
+ rules: options.rules,
87
+ whitespaceMode: options.unstable_whitespaceOnPasteMode,
88
+ types: adaptMatchers(options.matchers)
89
+ });
90
+ }
91
+ function adaptMatchers(matchers) {
92
+ if (!(!matchers?.image && !matchers?.inlineImage))
93
+ return {
94
+ image: ({ context, value, isInline }) => {
95
+ const matcher = isInline ? matchers.inlineImage : matchers.image;
96
+ if (!matcher)
97
+ return;
98
+ const result = matcher({ context, props: value });
99
+ if (result)
100
+ return result;
101
+ }
102
+ };
1317
103
  }
1318
104
  function isSanitySchema(schema) {
1319
105
  return schema.hasOwnProperty("jsonType");