@portabletext/html 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,1448 @@
1
+ import { compileSchema, defineSchema, isTextBlock, isSpan } from "@portabletext/schema";
2
+ import { vercelStegaClean } from "@vercel/stega";
3
+ import { isElement, tagName, PRESERVE_WHITESPACE_TAGS, HTML_BLOCK_TAGS, HTML_HEADER_TAGS, DEFAULT_SPAN, DEFAULT_BLOCK, BLOCK_DEFAULT_STYLE, HTML_LIST_CONTAINER_TAGS, HTML_SPAN_TAGS, HTML_LIST_ITEM_TAGS, HTML_DECORATOR_TAGS, isMinimalSpan, isEqualMarks, defaultParseHtml, flattenNestedBlocks, ensureRootIsBlocks, resolveJsType, isPlaceholderDecorator, isPlaceholderAnnotation, isMinimalBlock, isNodeList } from "./_chunks-es/helpers.js";
4
+ const normalStyleDefinition = {
5
+ name: "normal"
6
+ }, h1StyleDefinition = {
7
+ name: "h1"
8
+ }, h2StyleDefinition = {
9
+ name: "h2"
10
+ }, h3StyleDefinition = {
11
+ name: "h3"
12
+ }, h4StyleDefinition = {
13
+ name: "h4"
14
+ }, h5StyleDefinition = {
15
+ name: "h5"
16
+ }, h6StyleDefinition = {
17
+ name: "h6"
18
+ }, blockquoteStyleDefinition = {
19
+ name: "blockquote"
20
+ }, defaultOrderedListItemDefinition = {
21
+ name: "number"
22
+ }, defaultUnorderedListItemDefinition = {
23
+ name: "bullet"
24
+ }, defaultStrongDecoratorDefinition = {
25
+ name: "strong"
26
+ }, defaultEmDecoratorDefinition = {
27
+ name: "em"
28
+ }, defaultCodeDecoratorDefinition = {
29
+ name: "code"
30
+ }, defaultStrikeThroughDecoratorDefinition = {
31
+ name: "strike-through"
32
+ }, defaultLinkObjectDefinition = {
33
+ name: "link",
34
+ fields: [
35
+ { name: "href", type: "string" },
36
+ { name: "title", type: "string" }
37
+ ]
38
+ }, defaultCodeObjectDefinition = {
39
+ name: "code",
40
+ fields: [
41
+ { name: "language", type: "string" },
42
+ { name: "code", type: "string" }
43
+ ]
44
+ }, defaultImageObjectDefinition = {
45
+ name: "image",
46
+ fields: [
47
+ { name: "src", type: "string" },
48
+ { name: "alt", type: "string" },
49
+ { name: "title", type: "string" }
50
+ ]
51
+ }, defaultHorizontalRuleObjectDefinition = {
52
+ name: "horizontal-rule"
53
+ }, defaultHtmlObjectDefinition = {
54
+ name: "html",
55
+ fields: [{ name: "html", type: "string" }]
56
+ }, defaultTableObjectDefinition = {
57
+ name: "table",
58
+ fields: [
59
+ { name: "headerRows", type: "number" },
60
+ { name: "rows", type: "array" }
61
+ ]
62
+ }, defaultSchema = compileSchema(
63
+ defineSchema({
64
+ styles: [
65
+ normalStyleDefinition,
66
+ h1StyleDefinition,
67
+ h2StyleDefinition,
68
+ h3StyleDefinition,
69
+ h4StyleDefinition,
70
+ h5StyleDefinition,
71
+ h6StyleDefinition,
72
+ blockquoteStyleDefinition
73
+ ],
74
+ lists: [
75
+ defaultOrderedListItemDefinition,
76
+ defaultUnorderedListItemDefinition
77
+ ],
78
+ decorators: [
79
+ defaultStrongDecoratorDefinition,
80
+ defaultEmDecoratorDefinition,
81
+ defaultCodeDecoratorDefinition,
82
+ defaultStrikeThroughDecoratorDefinition
83
+ ],
84
+ annotations: [defaultLinkObjectDefinition],
85
+ blockObjects: [
86
+ defaultCodeObjectDefinition,
87
+ defaultHorizontalRuleObjectDefinition,
88
+ defaultImageObjectDefinition,
89
+ defaultHtmlObjectDefinition,
90
+ defaultTableObjectDefinition
91
+ ],
92
+ inlineObjects: [defaultImageObjectDefinition]
93
+ })
94
+ );
95
+ function isWordOnlineHtml(html) {
96
+ return /class="(?:TextRun|NormalTextRun)[^"]*SCXW\d+[^"]*BCX\d+/.test(html) || /class="EOP[^"]*SCXW\d+/.test(html);
97
+ }
98
+ function isWordOnlineTextRun(el) {
99
+ return !isElement(el) || tagName(el) !== "span" ? !1 : el.classList.contains("TextRun") && !el.classList.contains("EOP");
100
+ }
101
+ function isNormalTextRun(el) {
102
+ return !isElement(el) || tagName(el) !== "span" ? !1 : el.classList.contains("NormalTextRun");
103
+ }
104
+ function isTextRunSpan(el) {
105
+ return !isElement(el) || tagName(el) !== "span" ? !1 : el.classList.contains("TextRun") && !el.classList.contains("NormalTextRun") && !el.classList.contains("EOP");
106
+ }
107
+ function isFindHit(el) {
108
+ return !isElement(el) || tagName(el) !== "span" ? !1 : el.classList.contains("FindHit");
109
+ }
110
+ function isInHeading(el) {
111
+ let current = el;
112
+ for (; current; ) {
113
+ if (isElement(current) && tagName(current) === "word-online-block" && /^heading \d$/.test(current.getAttribute("data-parastyle") ?? ""))
114
+ return !0;
115
+ current = current.parentNode;
116
+ }
117
+ return !1;
118
+ }
119
+ function isInBlockquote(el) {
120
+ let current = el;
121
+ for (; current; ) {
122
+ if (isElement(current) && tagName(current) === "word-online-block" && current.getAttribute("data-parastyle") === "Quote")
123
+ return !0;
124
+ current = current.parentNode;
125
+ }
126
+ return !1;
127
+ }
128
+ function hasStrongFormatting(el) {
129
+ const style = el.getAttribute("style") ?? "";
130
+ return el.classList.contains("MacChromeBold") || /font-weight\s*:\s*bold/.test(style);
131
+ }
132
+ function hasEmphasisFormatting(el) {
133
+ const style = el.getAttribute("style") ?? "";
134
+ return /font-style\s*:\s*italic/.test(style);
135
+ }
136
+ function hasUnderlineFormatting(el) {
137
+ const style = el.getAttribute("style") ?? "";
138
+ return el.classList.contains("Underlined") || /text-decoration\s*:\s*underline/.test(style);
139
+ }
140
+ function hasStrikethroughFormatting(el) {
141
+ const style = el.getAttribute("style") ?? "";
142
+ return el.classList.contains("Strikethrough") || /text-decoration\s*:\s*line-through/.test(style);
143
+ }
144
+ function hasFormatting(el) {
145
+ return hasStrongFormatting(el) || hasEmphasisFormatting(el) || hasUnderlineFormatting(el) || hasStrikethroughFormatting(el);
146
+ }
147
+ function preprocessWordOnline(html, doc) {
148
+ if (!isWordOnlineHtml(html))
149
+ return doc;
150
+ const paragraphs = Array.from(
151
+ doc.querySelectorAll('p.Paragraph[role="heading"]')
152
+ );
153
+ for (const paragraph of paragraphs) {
154
+ const ariaLevel = paragraph.getAttribute("aria-level");
155
+ if (ariaLevel) {
156
+ const wrapper = doc.createElement("word-online-block");
157
+ wrapper.setAttribute("data-parastyle", `heading ${ariaLevel}`);
158
+ const parent = paragraph.parentNode;
159
+ if (parent) {
160
+ for (parent.insertBefore(wrapper, paragraph); paragraph.firstChild; )
161
+ wrapper.appendChild(paragraph.firstChild);
162
+ parent.removeChild(paragraph);
163
+ }
164
+ }
165
+ }
166
+ let child = doc.body.firstChild;
167
+ for (; child; ) {
168
+ const next = child.nextSibling;
169
+ if (!isElement(child) || !tagName(child)?.includes("span")) {
170
+ child = next;
171
+ continue;
172
+ }
173
+ const paraStyle = getParaStyle(child);
174
+ if (!paraStyle) {
175
+ child = next;
176
+ continue;
177
+ }
178
+ const group = [child];
179
+ let sibling = next;
180
+ for (; sibling && !(!isElement(sibling) || getParaStyle(sibling) !== paraStyle); )
181
+ group.push(sibling), sibling = sibling.nextSibling;
182
+ const wrapper = doc.createElement("word-online-block");
183
+ wrapper.setAttribute("data-parastyle", paraStyle), doc.body.insertBefore(wrapper, child);
184
+ for (const span of group)
185
+ wrapper.appendChild(span);
186
+ child = sibling;
187
+ }
188
+ const textRunSpans = Array.from(doc.body.querySelectorAll("span")).filter(
189
+ isTextRunSpan
190
+ );
191
+ for (const textRunSpan of textRunSpans) {
192
+ const normalTextRuns = Array.from(textRunSpan.childNodes).filter(
193
+ isNormalTextRun
194
+ );
195
+ for (const normalTextRun of normalTextRuns) {
196
+ let foundNestedSpan = !0;
197
+ for (; foundNestedSpan; ) {
198
+ const children = Array.from(normalTextRun.childNodes), nestedSpanIndex = children.findIndex(
199
+ (node) => isElement(node) && tagName(node) === "span" && node.textContent.trim() === ""
200
+ );
201
+ if (nestedSpanIndex === -1) {
202
+ foundNestedSpan = !1;
203
+ break;
204
+ }
205
+ const nestedSpan = children.at(nestedSpanIndex);
206
+ if (!nestedSpan) {
207
+ foundNestedSpan = !1;
208
+ break;
209
+ }
210
+ const spaceText = nestedSpan.textContent?.replace(/\u00a0/g, " ") ?? "", isSpaceAtBeginning = !children.slice(0, nestedSpanIndex).some((n) => n.nodeType === 3);
211
+ if (normalTextRun.removeChild(nestedSpan), isSpaceAtBeginning) {
212
+ const firstTextNode = Array.from(normalTextRun.childNodes).find(
213
+ (n) => n.nodeType === 3
214
+ );
215
+ if (firstTextNode)
216
+ firstTextNode.textContent = spaceText + (firstTextNode.textContent || "");
217
+ else {
218
+ const spaceNode = doc.createTextNode(spaceText);
219
+ normalTextRun.insertBefore(spaceNode, normalTextRun.firstChild);
220
+ }
221
+ } else {
222
+ const nextSibling = textRunSpan.nextSibling, currentHasFormatting = hasFormatting(textRunSpan);
223
+ if (nextSibling && isElement(nextSibling) && isTextRunSpan(nextSibling)) {
224
+ const nextHasFormatting = hasFormatting(nextSibling);
225
+ if (currentHasFormatting && !nextHasFormatting) {
226
+ const nextNormalTextRun = Array.from(nextSibling.childNodes).find(
227
+ isNormalTextRun
228
+ );
229
+ if (nextNormalTextRun && isElement(nextNormalTextRun)) {
230
+ const firstChild = nextNormalTextRun.firstChild;
231
+ if (firstChild && firstChild.nodeType === 3)
232
+ firstChild.textContent = spaceText + (firstChild.textContent ?? "");
233
+ else {
234
+ const spaceNode = doc.createTextNode(spaceText);
235
+ nextNormalTextRun.insertBefore(
236
+ spaceNode,
237
+ nextNormalTextRun.firstChild
238
+ );
239
+ }
240
+ }
241
+ } else {
242
+ const lastTextNode = Array.from(normalTextRun.childNodes).find(
243
+ (n) => n.nodeType === 3
244
+ );
245
+ if (lastTextNode)
246
+ lastTextNode.textContent = (lastTextNode.textContent ?? "") + spaceText;
247
+ else {
248
+ const spaceNode = doc.createTextNode(spaceText);
249
+ normalTextRun.appendChild(spaceNode);
250
+ }
251
+ }
252
+ } else {
253
+ const lastTextNode = Array.from(normalTextRun.childNodes).find(
254
+ (n) => n.nodeType === 3
255
+ );
256
+ if (lastTextNode)
257
+ lastTextNode.textContent = (lastTextNode.textContent ?? "") + spaceText;
258
+ else {
259
+ const spaceNode = doc.createTextNode(spaceText);
260
+ normalTextRun.appendChild(spaceNode);
261
+ }
262
+ }
263
+ }
264
+ }
265
+ }
266
+ }
267
+ return doc;
268
+ }
269
+ function getParaStyle(element) {
270
+ const directStyle = element.getAttribute("data-ccp-parastyle");
271
+ if (directStyle)
272
+ return directStyle;
273
+ if (tagName(element) === "span" && element.classList.contains("TextRun")) {
274
+ const normalTextRuns = Array.from(
275
+ element.querySelectorAll(".NormalTextRun")
276
+ );
277
+ if (normalTextRuns.length > 0) {
278
+ const firstStyle = normalTextRuns[0]?.getAttribute("data-ccp-parastyle");
279
+ if (firstStyle && normalTextRuns.every(
280
+ (normalTextRun) => normalTextRun.getAttribute("data-ccp-parastyle") === firstStyle
281
+ ))
282
+ return firstStyle;
283
+ }
284
+ }
285
+ }
286
+ const _XPathResult = {
287
+ BOOLEAN_TYPE: 3,
288
+ ORDERED_NODE_ITERATOR_TYPE: 5,
289
+ UNORDERED_NODE_SNAPSHOT_TYPE: 6
290
+ };
291
+ function preprocessGDocs(_html, doc) {
292
+ let gDocsRootOrSiblingNode = doc.evaluate(
293
+ '//*[@id and contains(@id, "docs-internal-guid")]',
294
+ doc,
295
+ null,
296
+ _XPathResult.ORDERED_NODE_ITERATOR_TYPE,
297
+ null
298
+ ).iterateNext();
299
+ if (gDocsRootOrSiblingNode) {
300
+ const isWrappedRootTag = tagName(gDocsRootOrSiblingNode) === "b";
301
+ isWrappedRootTag || (gDocsRootOrSiblingNode = doc.body);
302
+ const childNodes = doc.evaluate(
303
+ "//*",
304
+ doc,
305
+ null,
306
+ _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
307
+ null
308
+ );
309
+ for (let i = childNodes.snapshotLength - 1; i >= 0; i--) {
310
+ const elm = childNodes.snapshotItem(i);
311
+ elm?.setAttribute("data-is-google-docs", "true"), (elm?.parentElement === gDocsRootOrSiblingNode || !isWrappedRootTag && elm.parentElement === doc.body) && (elm?.setAttribute("data-is-root-node", "true"), tagName(elm)), tagName(elm) === "li" && elm.firstChild && tagName(elm?.firstChild) === "img" && elm.removeChild(elm.firstChild);
312
+ }
313
+ return isWrappedRootTag && doc.body.firstElementChild?.replaceWith(
314
+ ...Array.from(gDocsRootOrSiblingNode.childNodes)
315
+ ), doc;
316
+ }
317
+ return doc;
318
+ }
319
+ const unwantedWordDocumentPaths = [
320
+ "/html/text()",
321
+ "/html/head/text()",
322
+ "/html/body/text()",
323
+ "/html/body/ul/text()",
324
+ "/html/body/ol/text()",
325
+ "//comment()",
326
+ "//style",
327
+ "//xml",
328
+ "//script",
329
+ "//meta",
330
+ "//link"
331
+ ];
332
+ function preprocessHTML(_html, doc) {
333
+ const bodyTextNodes = doc.evaluate(
334
+ "/html/body/text()",
335
+ doc,
336
+ null,
337
+ _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
338
+ null
339
+ );
340
+ for (let i = bodyTextNodes.snapshotLength - 1; i >= 0; i--) {
341
+ const node = bodyTextNodes.snapshotItem(i), text = node.textContent || "";
342
+ if (text.replace(/[^\S\n]+$/g, "")) {
343
+ const newNode = doc.createElement("span");
344
+ newNode.appendChild(doc.createTextNode(text)), node.parentNode?.replaceChild(newNode, node);
345
+ } else
346
+ node.parentNode?.removeChild(node);
347
+ }
348
+ const unwantedNodes = doc.evaluate(
349
+ unwantedWordDocumentPaths.join("|"),
350
+ doc,
351
+ null,
352
+ _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
353
+ null
354
+ );
355
+ for (let i = unwantedNodes.snapshotLength - 1; i >= 0; i--) {
356
+ const unwanted = unwantedNodes.snapshotItem(i);
357
+ unwanted && unwanted.parentNode?.removeChild(unwanted);
358
+ }
359
+ return doc;
360
+ }
361
+ function preprocessNotion(html, doc) {
362
+ const NOTION_REGEX = /<!-- notionvc:.*?-->/g;
363
+ if (html.match(NOTION_REGEX)) {
364
+ const childNodes = doc.evaluate(
365
+ "//*",
366
+ doc,
367
+ null,
368
+ _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
369
+ null
370
+ );
371
+ for (let i = childNodes.snapshotLength - 1; i >= 0; i--)
372
+ childNodes.snapshotItem(i)?.setAttribute("data-is-notion", "true");
373
+ return doc;
374
+ }
375
+ return doc;
376
+ }
377
+ const BLOCK_CONTAINER_ELEMENTS = [
378
+ "body",
379
+ "table",
380
+ "tbody",
381
+ "thead",
382
+ "tfoot",
383
+ "tr",
384
+ "ul",
385
+ "ol"
386
+ ];
387
+ function preprocessWhitespace(_, doc) {
388
+ function processNode(node) {
389
+ if (node.nodeType === _XPathResult.BOOLEAN_TYPE && !PRESERVE_WHITESPACE_TAGS.includes(
390
+ node.parentElement?.tagName.toLowerCase() || ""
391
+ )) {
392
+ const normalized = node.textContent?.replace(/\s\s+/g, " ").replace(/[\r\n]+/g, " ") || "", parentTag = node.parentElement?.tagName.toLowerCase();
393
+ parentTag && BLOCK_CONTAINER_ELEMENTS.includes(parentTag) && normalized.trim() === "" ? node.parentNode?.removeChild(node) : node.textContent = normalized;
394
+ } else
395
+ for (let i = node.childNodes.length - 1; i >= 0; i--) {
396
+ const child = node.childNodes[i];
397
+ child && processNode(child);
398
+ }
399
+ }
400
+ return processNode(doc.body), doc;
401
+ }
402
+ const WORD_HTML_REGEX = /(class="?Mso|style=(?:"|')[^"]*?\bmso-|w:WordDocument|<o:\w+>|<\/font>)/, unwantedPaths = [
403
+ "//o:p",
404
+ "//span[@style='mso-list:Ignore']",
405
+ "//span[@style='mso-list: Ignore']"
406
+ ], mappedPaths = [
407
+ "//p[@class='MsoTocHeading']",
408
+ "//p[@class='MsoTitle']",
409
+ "//p[@class='MsoToaHeading']",
410
+ "//p[@class='MsoSubtitle']",
411
+ "//span[@class='MsoSubtleEmphasis']",
412
+ "//span[@class='MsoIntenseEmphasis']"
413
+ ], elementMap = {
414
+ MsoTocHeading: ["h3"],
415
+ MsoTitle: ["h1"],
416
+ MsoToaHeading: ["h2"],
417
+ MsoSubtitle: ["h5"],
418
+ MsoSubtleEmphasis: ["span", "em"],
419
+ MsoIntenseEmphasis: ["span", "em", "strong"]
420
+ // Remove cruft
421
+ };
422
+ function isWordHtml(html) {
423
+ return WORD_HTML_REGEX.test(html);
424
+ }
425
+ function extractListStyles(doc) {
426
+ const map = {};
427
+ let found = !1;
428
+ const styleEls = doc.querySelectorAll("style");
429
+ for (const styleEl of styleEls) {
430
+ const css = styleEl.textContent || "", listRulePattern = /@list\s+(l\d+):(level\d+)\s*\{([^}]*)\}/g;
431
+ for (let match = listRulePattern.exec(css); match !== null; match = listRulePattern.exec(css)) {
432
+ const listId = match[1] || "", level = match[2] || "", ruleBody = match[3] || "", key = `${listId}:${level}`;
433
+ /mso-level-number-format\s*:\s*bullet/i.test(ruleBody) ? map[key] = "bullet" : map[key] = "number", found = !0;
434
+ }
435
+ }
436
+ found && doc.body && doc.body.setAttribute("data-word-list-styles", JSON.stringify(map));
437
+ }
438
+ function preprocessWord(html, doc) {
439
+ if (!isWordHtml(html))
440
+ return doc;
441
+ extractListStyles(doc);
442
+ const unwantedNodes = doc.evaluate(
443
+ unwantedPaths.join("|"),
444
+ doc,
445
+ (prefix) => prefix === "o" ? "urn:schemas-microsoft-com:office:office" : null,
446
+ _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
447
+ null
448
+ );
449
+ for (let i = unwantedNodes.snapshotLength - 1; i >= 0; i--) {
450
+ const unwanted = unwantedNodes.snapshotItem(i);
451
+ unwanted?.parentNode && unwanted.parentNode.removeChild(unwanted);
452
+ }
453
+ const mappedElements = doc.evaluate(
454
+ mappedPaths.join("|"),
455
+ doc,
456
+ null,
457
+ _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
458
+ null
459
+ );
460
+ for (let i = mappedElements.snapshotLength - 1; i >= 0; i--) {
461
+ const mappedElm = mappedElements.snapshotItem(i), tags = elementMap[mappedElm.className], text = doc.createTextNode(mappedElm.textContent || "");
462
+ if (!tags)
463
+ continue;
464
+ const firstTag = tags[0];
465
+ if (!firstTag)
466
+ continue;
467
+ const parentElement = doc.createElement(firstTag);
468
+ let parent = parentElement, child = parentElement;
469
+ tags.slice(1).forEach((tag) => {
470
+ child = doc.createElement(tag), parent.appendChild(child), parent = child;
471
+ }), child.appendChild(text), mappedElm?.parentNode?.replaceChild(parentElement, mappedElm);
472
+ }
473
+ return doc;
474
+ }
475
+ const preprocessors = [
476
+ preprocessWhitespace,
477
+ preprocessNotion,
478
+ preprocessWord,
479
+ preprocessWordOnline,
480
+ preprocessGDocs,
481
+ preprocessHTML
482
+ ];
483
+ function keyGenerator() {
484
+ return randomKey(12);
485
+ }
486
+ function whatwgRNG(length = 16) {
487
+ const rnds8 = new Uint8Array(length);
488
+ return crypto.getRandomValues(rnds8), rnds8;
489
+ }
490
+ const byteToHex = [];
491
+ for (let i = 0; i < 256; ++i)
492
+ byteToHex[i] = (i + 256).toString(16).slice(1);
493
+ function randomKey(length) {
494
+ return whatwgRNG(length).reduce((str, n) => str + byteToHex[n], "").slice(0, length);
495
+ }
496
+ function mapParaStyleToBlockStyle(schema, paraStyle) {
497
+ const blockStyle = {
498
+ "heading 1": "h1",
499
+ "heading 2": "h2",
500
+ "heading 3": "h3",
501
+ "heading 4": "h4",
502
+ "heading 5": "h5",
503
+ "heading 6": "h6",
504
+ Quote: "blockquote"
505
+ }[paraStyle] ?? "normal";
506
+ return schema.styles.find((style) => style.name === blockStyle)?.name;
507
+ }
508
+ function createWordOnlineRules(schema, options) {
509
+ return [
510
+ // Image rule - handles bare Word Online <img> tags with WACImage class
511
+ {
512
+ deserialize(el) {
513
+ if (!isElement(el) || tagName(el) !== "img")
514
+ return;
515
+ const classNameRaw = el.className;
516
+ let className = "";
517
+ if (typeof classNameRaw == "string" ? className = classNameRaw : classNameRaw && typeof classNameRaw == "object" && (className = classNameRaw.baseVal || ""), !className.includes("WACImage"))
518
+ return;
519
+ const src = el.getAttribute("src") ?? void 0, alt = el.getAttribute("alt") ?? void 0, props = Object.fromEntries(
520
+ Array.from(el.attributes).map((attr) => [attr.name, attr.value])
521
+ ), image = options.matchers?.image?.({
522
+ context: {
523
+ schema,
524
+ keyGenerator: options.keyGenerator ?? keyGenerator
525
+ },
526
+ props: {
527
+ ...props,
528
+ ...src ? { src } : {},
529
+ ...alt ? { alt } : {}
530
+ }
531
+ });
532
+ if (image)
533
+ return {
534
+ _type: "__block",
535
+ block: image
536
+ };
537
+ }
538
+ },
539
+ // Image rule - handles Word Online images wrapped in WACImageContainer
540
+ {
541
+ deserialize(el) {
542
+ if (!isElement(el))
543
+ return;
544
+ const classNameRaw = el.className;
545
+ let className = "";
546
+ if (typeof classNameRaw == "string" ? className = classNameRaw : classNameRaw && typeof classNameRaw == "object" && (className = classNameRaw.baseVal || ""), !className.includes("WACImageContainer"))
547
+ return;
548
+ const img = el.querySelector("img");
549
+ if (!img)
550
+ return;
551
+ const src = img.getAttribute("src") ?? void 0, alt = img.getAttribute("alt") ?? void 0, props = Object.fromEntries(
552
+ Array.from(img.attributes).map((attr) => [attr.name, attr.value])
553
+ ), isInsideListItem = el.closest("li") !== null;
554
+ if (el.closest("p") === null || isInsideListItem) {
555
+ const inlineImage = options.matchers?.inlineImage?.({
556
+ context: {
557
+ schema,
558
+ keyGenerator: options.keyGenerator ?? keyGenerator
559
+ },
560
+ props: {
561
+ ...props,
562
+ ...src ? { src } : {},
563
+ ...alt ? { alt } : {}
564
+ }
565
+ });
566
+ if (inlineImage)
567
+ return inlineImage;
568
+ }
569
+ const image = options.matchers?.image?.({
570
+ context: {
571
+ schema,
572
+ keyGenerator: options.keyGenerator ?? keyGenerator
573
+ },
574
+ props: {
575
+ ...props,
576
+ ...src ? { src } : {},
577
+ ...alt ? { alt } : {}
578
+ }
579
+ });
580
+ if (image)
581
+ return {
582
+ _type: "__block",
583
+ block: image
584
+ };
585
+ }
586
+ },
587
+ // List item rule - handles <li> elements with aria-level
588
+ {
589
+ deserialize(el, next) {
590
+ if (!isElement(el) || tagName(el) !== "li")
591
+ return;
592
+ const ariaLevel = el.getAttribute("data-aria-level");
593
+ if (!ariaLevel)
594
+ return;
595
+ const listItem = tagName(el.parentNode) === "ol" ? "number" : "bullet";
596
+ let childNodesToProcess = el.childNodes, blockStyle = "normal";
597
+ if (el.childNodes.length === 1 && el.firstChild && isElement(el.firstChild)) {
598
+ const childTag = tagName(el.firstChild);
599
+ if (childTag && (HTML_BLOCK_TAGS[childTag] || HTML_HEADER_TAGS[childTag] || childTag === "word-online-block")) {
600
+ if (childTag === "word-online-block") {
601
+ const paraStyle = el.firstChild.getAttribute("data-parastyle"), foundBlockStyle = paraStyle ? mapParaStyleToBlockStyle(schema, paraStyle) : void 0;
602
+ foundBlockStyle && (blockStyle = foundBlockStyle);
603
+ }
604
+ childNodesToProcess = el.firstChild.childNodes;
605
+ }
606
+ }
607
+ const children = next(childNodesToProcess);
608
+ let childArray = Array.isArray(children) ? children : [children].filter(Boolean);
609
+ for (; childArray.length > 0; ) {
610
+ const lastChild = childArray[childArray.length - 1];
611
+ if (lastChild && typeof lastChild == "object" && "text" in lastChild) {
612
+ const text = lastChild.text.trimEnd();
613
+ if (text === "")
614
+ childArray = childArray.slice(0, -1);
615
+ else if (text !== lastChild.text) {
616
+ lastChild.text = text;
617
+ break;
618
+ } else
619
+ break;
620
+ } else
621
+ break;
622
+ }
623
+ return {
624
+ _type: schema.block.name,
625
+ children: childArray,
626
+ markDefs: [],
627
+ style: blockStyle,
628
+ listItem,
629
+ level: parseInt(ariaLevel, 10)
630
+ };
631
+ }
632
+ },
633
+ // Block style rule - handles paragraph styles like Quote
634
+ // The preprocessor wraps grouped NormalTextRun spans in a word-online-block element
635
+ {
636
+ deserialize(el, next) {
637
+ if (!isElement(el))
638
+ return;
639
+ const paraStyle = el.getAttribute("data-parastyle"), blockStyle = paraStyle ? mapParaStyleToBlockStyle(schema, paraStyle) : void 0;
640
+ if (!blockStyle)
641
+ return;
642
+ const children = next(el.childNodes);
643
+ return {
644
+ _type: schema.block.name,
645
+ style: blockStyle,
646
+ markDefs: [],
647
+ children: Array.isArray(children) ? children : children ? [children] : []
648
+ };
649
+ }
650
+ },
651
+ // TextRun rule
652
+ {
653
+ deserialize(el) {
654
+ if (isWordOnlineTextRun(el)) {
655
+ if (!isElement(el) || !el.textContent)
656
+ return;
657
+ const text = Array.from(el.childNodes).filter(
658
+ (node) => isNormalTextRun(node) || isFindHit(node)
659
+ ).map((span2) => isElement(span2) ? span2.textContent ?? "" : "").join("");
660
+ if (!text)
661
+ return;
662
+ const span = {
663
+ ...DEFAULT_SPAN,
664
+ marks: [],
665
+ text
666
+ };
667
+ if (hasStrongFormatting(el) && span.marks.push("strong"), hasEmphasisFormatting(el) && !isInHeading(el) && !isInBlockquote(el) && span.marks.push("em"), hasUnderlineFormatting(el))
668
+ if (isElement(el) && el.parentElement && tagName(el.parentElement) === "a") {
669
+ const linkElement = el.parentElement;
670
+ if (linkElement) {
671
+ const prevSibling = linkElement.previousSibling, nextSibling = linkElement.nextSibling, hasPrevUnderline = prevSibling && isElement(prevSibling) && hasUnderlineFormatting(prevSibling), hasNextUnderline = nextSibling && isElement(nextSibling) && hasUnderlineFormatting(nextSibling);
672
+ (hasPrevUnderline || hasNextUnderline) && span.marks.push("underline");
673
+ }
674
+ } else
675
+ span.marks.push("underline");
676
+ return hasStrikethroughFormatting(el) && span.marks.push("strike-through"), span;
677
+ }
678
+ }
679
+ }
680
+ ];
681
+ }
682
+ const LIST_CONTAINER_TAGS = Object.keys(HTML_LIST_CONTAINER_TAGS);
683
+ function isEmphasis$1(el) {
684
+ const style = isElement(el) && el.getAttribute("style");
685
+ return /font-style\s*:\s*italic/.test(style || "");
686
+ }
687
+ function isStrong$1(el) {
688
+ const style = isElement(el) && el.getAttribute("style");
689
+ return /font-weight\s*:\s*700/.test(style || "");
690
+ }
691
+ function isUnderline$1(el) {
692
+ if (!isElement(el) || tagName(el.parentNode) === "a")
693
+ return !1;
694
+ const style = isElement(el) && el.getAttribute("style");
695
+ return /text-decoration\s*:\s*underline/.test(style || "");
696
+ }
697
+ function isStrikethrough(el) {
698
+ const style = isElement(el) && el.getAttribute("style");
699
+ return /text-decoration\s*:\s*(?:.*line-through.*;)/.test(style || "");
700
+ }
701
+ function isGoogleDocs(el) {
702
+ return isElement(el) && !!el.getAttribute("data-is-google-docs");
703
+ }
704
+ function isRootNode(el) {
705
+ return isElement(el) && !!el.getAttribute("data-is-root-node");
706
+ }
707
+ function getListItemStyle$1(el) {
708
+ const parentTag = tagName(el.parentNode);
709
+ if (!(parentTag && !LIST_CONTAINER_TAGS.includes(parentTag)))
710
+ return tagName(el.parentNode) === "ul" ? "bullet" : "number";
711
+ }
712
+ function getListItemLevel$1(el) {
713
+ let level = 0;
714
+ if (tagName(el) === "li") {
715
+ let parentNode = el.parentNode;
716
+ for (; parentNode; ) {
717
+ const parentTag = tagName(parentNode);
718
+ parentTag && LIST_CONTAINER_TAGS.includes(parentTag) && level++, parentNode = parentNode.parentNode;
719
+ }
720
+ } else
721
+ level = 1;
722
+ return level;
723
+ }
724
+ const blocks = {
725
+ ...HTML_BLOCK_TAGS,
726
+ ...HTML_HEADER_TAGS
727
+ };
728
+ function getBlockStyle(schema, el) {
729
+ const childTag = tagName(el.firstChild), block = childTag && blocks[childTag];
730
+ return block ? schema.styles.some((style) => style.name === block.style) ? block.style : BLOCK_DEFAULT_STYLE : BLOCK_DEFAULT_STYLE;
731
+ }
732
+ function createGDocsRules(schema) {
733
+ return [
734
+ {
735
+ deserialize(el, next) {
736
+ if (isElement(el) && tagName(el) === "span" && isGoogleDocs(el)) {
737
+ if (!el.textContent)
738
+ return !el.previousSibling && !el.nextSibling && el.setAttribute("data-lonely-child", "true"), next(el.childNodes);
739
+ const span = {
740
+ ...DEFAULT_SPAN,
741
+ marks: [],
742
+ text: el.textContent
743
+ };
744
+ return isStrong$1(el) && span.marks.push("strong"), isUnderline$1(el) && span.marks.push("underline"), isStrikethrough(el) && span.marks.push("strike-through"), isEmphasis$1(el) && span.marks.push("em"), span;
745
+ }
746
+ }
747
+ },
748
+ {
749
+ deserialize(el, next) {
750
+ if (tagName(el) === "li" && isGoogleDocs(el))
751
+ return {
752
+ ...DEFAULT_BLOCK,
753
+ listItem: getListItemStyle$1(el),
754
+ level: getListItemLevel$1(el),
755
+ style: getBlockStyle(schema, el),
756
+ children: next(el.firstChild?.childNodes || [])
757
+ };
758
+ }
759
+ },
760
+ {
761
+ deserialize(el) {
762
+ if (tagName(el) === "br" && isGoogleDocs(el) && isElement(el) && el.classList.contains("apple-interchange-newline"))
763
+ return {
764
+ ...DEFAULT_SPAN,
765
+ text: ""
766
+ };
767
+ if (tagName(el) === "br" && isGoogleDocs(el) && isElement(el) && el?.parentNode?.textContent === "")
768
+ return {
769
+ ...DEFAULT_SPAN,
770
+ text: ""
771
+ };
772
+ if (tagName(el) === "br" && isGoogleDocs(el) && isElement(el) && isRootNode(el))
773
+ return {
774
+ ...DEFAULT_SPAN,
775
+ text: ""
776
+ };
777
+ }
778
+ }
779
+ ];
780
+ }
781
+ const whitespaceTextNodeRule = {
782
+ deserialize(node) {
783
+ return node.nodeName === "#text" && isWhitespaceTextNode(node) ? {
784
+ ...DEFAULT_SPAN,
785
+ marks: [],
786
+ text: (node.textContent ?? "").replace(/\s\s+/g, " ")
787
+ } : void 0;
788
+ }
789
+ };
790
+ function isWhitespaceTextNode(node) {
791
+ const isWhitespaceOnly = node.nodeType === 3 && (node.textContent || "").replace(/[\r\n]/g, " ").replace(/\s\s+/g, " ") === " ", hasSiblingContext = node.nextSibling && node.nextSibling.nodeType !== 3 && node.previousSibling && node.previousSibling.nodeType !== 3, hasParentSiblingContext = node.parentNode && tagName(node.parentNode) === "span" && !node.nextSibling && !node.previousSibling && node.parentNode.previousSibling && node.parentNode.previousSibling.nodeType !== 3 && node.parentNode.nextSibling && node.parentNode.nextSibling.nodeType !== 3;
792
+ return (isWhitespaceOnly && (hasSiblingContext || hasParentSiblingContext) || node.textContent !== " ") && tagName(node.parentNode) !== "body";
793
+ }
794
+ function resolveListItem(schema, listNodeTagName) {
795
+ if (listNodeTagName === "ul" && schema.lists.some((list) => list.name === "bullet"))
796
+ return "bullet";
797
+ if (listNodeTagName === "ol" && schema.lists.some((list) => list.name === "number"))
798
+ return "number";
799
+ }
800
+ function createHTMLRules(schema, options) {
801
+ return [
802
+ whitespaceTextNodeRule,
803
+ {
804
+ // Pre element
805
+ deserialize(el) {
806
+ if (tagName(el) !== "pre")
807
+ return;
808
+ const isCodeEnabled = schema.styles.some(
809
+ (style) => style.name === "code"
810
+ );
811
+ return {
812
+ _type: "block",
813
+ style: "normal",
814
+ markDefs: [],
815
+ children: [
816
+ {
817
+ ...DEFAULT_SPAN,
818
+ marks: isCodeEnabled ? ["code"] : [],
819
+ text: el.textContent || ""
820
+ }
821
+ ]
822
+ };
823
+ }
824
+ },
825
+ // Blockquote element
826
+ {
827
+ deserialize(el, next) {
828
+ if (tagName(el) !== "blockquote")
829
+ return;
830
+ const blocks2 = {
831
+ ...HTML_BLOCK_TAGS,
832
+ ...HTML_HEADER_TAGS
833
+ };
834
+ delete blocks2.blockquote;
835
+ const nonBlockquoteBlocks = Object.keys(blocks2), children = [];
836
+ return el.childNodes.forEach((node, index) => {
837
+ if (el.ownerDocument)
838
+ if (node.nodeType === 1 && nonBlockquoteBlocks.includes(
839
+ node.localName.toLowerCase()
840
+ )) {
841
+ const span = el.ownerDocument.createElement("span"), previousChild = children[children.length - 1];
842
+ previousChild && previousChild.nodeType === 3 && previousChild.textContent?.trim() && span.appendChild(el.ownerDocument.createTextNode("\r")), node.childNodes.forEach((cn) => {
843
+ span.appendChild(cn.cloneNode(!0));
844
+ }), index !== el.childNodes.length && span.appendChild(el.ownerDocument.createTextNode("\r")), children.push(span);
845
+ } else
846
+ children.push(node);
847
+ }), {
848
+ _type: "block",
849
+ style: "blockquote",
850
+ markDefs: [],
851
+ children: next(children)
852
+ };
853
+ }
854
+ },
855
+ // Block elements
856
+ {
857
+ deserialize(el, next) {
858
+ const blocks2 = {
859
+ ...HTML_BLOCK_TAGS,
860
+ ...HTML_HEADER_TAGS
861
+ }, tag = tagName(el);
862
+ let block = tag ? blocks2[tag] : void 0;
863
+ if (!block)
864
+ return;
865
+ if (el.parentNode && tagName(el.parentNode) === "li")
866
+ return next(el.childNodes);
867
+ const blockStyle = block.style;
868
+ return schema.styles.some((style) => style.name === blockStyle) || (block = DEFAULT_BLOCK), {
869
+ ...block,
870
+ children: next(el.childNodes)
871
+ };
872
+ }
873
+ },
874
+ // Ignore span tags
875
+ {
876
+ deserialize(el, next) {
877
+ const tag = tagName(el);
878
+ if (!(!tag || !(tag in HTML_SPAN_TAGS)))
879
+ return next(el.childNodes);
880
+ }
881
+ },
882
+ // Ignore div tags
883
+ {
884
+ deserialize(el, next) {
885
+ if (tagName(el) === "div")
886
+ return next(el.childNodes);
887
+ }
888
+ },
889
+ // Ignore list containers
890
+ {
891
+ deserialize(el, next) {
892
+ const tag = tagName(el);
893
+ if (!(!tag || !(tag in HTML_LIST_CONTAINER_TAGS)))
894
+ return next(el.childNodes);
895
+ }
896
+ },
897
+ // Deal with br's
898
+ {
899
+ deserialize(el) {
900
+ if (tagName(el) === "br")
901
+ return {
902
+ ...DEFAULT_SPAN,
903
+ text: `
904
+ `
905
+ };
906
+ }
907
+ },
908
+ // Deal with list items
909
+ {
910
+ deserialize(el, next, block) {
911
+ const tag = tagName(el), listItem = tag ? HTML_LIST_ITEM_TAGS[tag] : void 0;
912
+ if (!listItem)
913
+ return;
914
+ const parentTag = tagName(el.parentNode) || "", listTag = HTML_LIST_CONTAINER_TAGS[parentTag] ? parentTag : "ul", enabledListItem = resolveListItem(schema, listTag);
915
+ return enabledListItem ? (listItem.listItem = enabledListItem, {
916
+ ...listItem,
917
+ children: next(el.childNodes)
918
+ }) : block({ _type: "block", children: next(el.childNodes) });
919
+ }
920
+ },
921
+ // Deal with decorators - this is a limited set of known html elements that we know how to deserialize
922
+ {
923
+ deserialize(el, next) {
924
+ const decorator = HTML_DECORATOR_TAGS[tagName(el) || ""];
925
+ if (!(!decorator || !schema.decorators.some(
926
+ (decoratorType) => decoratorType.name === decorator
927
+ )))
928
+ return {
929
+ _type: "__decorator",
930
+ name: decorator,
931
+ children: next(el.childNodes)
932
+ };
933
+ }
934
+ },
935
+ // Special case for hyperlinks, add annotation (if allowed by schema),
936
+ // If not supported just write out the link text and href in plain text.
937
+ {
938
+ deserialize(el, next) {
939
+ if (tagName(el) !== "a")
940
+ return;
941
+ const linkEnabled = schema.annotations.some(
942
+ (annotation) => annotation.name === "link"
943
+ ), href = isElement(el) && el.getAttribute("href");
944
+ return href ? linkEnabled ? {
945
+ _type: "__annotation",
946
+ markDef: {
947
+ _key: options.keyGenerator ? options.keyGenerator() : keyGenerator(),
948
+ _type: "link",
949
+ href
950
+ },
951
+ children: next(el.childNodes)
952
+ } : el.appendChild(el.ownerDocument.createTextNode(` (${href})`)) && next(el.childNodes) : next(el.childNodes);
953
+ }
954
+ },
955
+ {
956
+ deserialize(el, next) {
957
+ if (isElement(el) && (tagName(el) === "td" || tagName(el) === "th"))
958
+ return {
959
+ ...DEFAULT_BLOCK,
960
+ children: next(el.childNodes)
961
+ };
962
+ }
963
+ },
964
+ {
965
+ deserialize(el) {
966
+ if (isElement(el) && tagName(el) === "img") {
967
+ const src = el.getAttribute("src") ?? void 0, alt = el.getAttribute("alt") ?? void 0, props = Object.fromEntries(
968
+ Array.from(el.attributes).map((attr) => [attr.name, attr.value])
969
+ ), ancestorOfLonelyChild = el?.parentElement?.parentElement?.getAttribute("data-lonely-child"), ancestorOfListItem = el.closest("li") !== null;
970
+ if (ancestorOfLonelyChild && !ancestorOfListItem) {
971
+ const image2 = options.matchers?.image?.({
972
+ context: {
973
+ schema,
974
+ keyGenerator: options.keyGenerator ?? keyGenerator
975
+ },
976
+ props: {
977
+ ...props,
978
+ ...src ? { src } : {},
979
+ ...alt ? { alt } : {}
980
+ }
981
+ });
982
+ if (image2)
983
+ return {
984
+ _type: "__block",
985
+ block: image2
986
+ };
987
+ }
988
+ const inlineImage = options.matchers?.inlineImage?.({
989
+ context: {
990
+ schema,
991
+ keyGenerator: options.keyGenerator ?? keyGenerator
992
+ },
993
+ props: {
994
+ ...props,
995
+ ...src ? { src } : {},
996
+ ...alt ? { alt } : {}
997
+ }
998
+ });
999
+ if (inlineImage)
1000
+ return inlineImage;
1001
+ const image = options.matchers?.image?.({
1002
+ context: {
1003
+ schema,
1004
+ keyGenerator: options.keyGenerator ?? keyGenerator
1005
+ },
1006
+ props: {
1007
+ ...props,
1008
+ ...src ? { src } : {},
1009
+ ...alt ? { alt } : {}
1010
+ }
1011
+ });
1012
+ if (image)
1013
+ return {
1014
+ _type: "__block",
1015
+ block: image
1016
+ };
1017
+ }
1018
+ }
1019
+ }
1020
+ ];
1021
+ }
1022
+ function isEmphasis(el) {
1023
+ const style = isElement(el) && el.getAttribute("style");
1024
+ return /font-style:italic/.test(style || "");
1025
+ }
1026
+ function isStrong(el) {
1027
+ const style = isElement(el) && el.getAttribute("style");
1028
+ return /font-weight:700/.test(style || "") || /font-weight:600/.test(style || "");
1029
+ }
1030
+ function isUnderline(el) {
1031
+ const style = isElement(el) && el.getAttribute("style");
1032
+ return /text-decoration:underline/.test(style || "");
1033
+ }
1034
+ function isNotion(el) {
1035
+ return isElement(el) && !!el.getAttribute("data-is-notion");
1036
+ }
1037
+ function createNotionRules() {
1038
+ return [
1039
+ {
1040
+ deserialize(el) {
1041
+ if (isElement(el) && tagName(el) === "span" && isNotion(el)) {
1042
+ const span = {
1043
+ ...DEFAULT_SPAN,
1044
+ marks: [],
1045
+ text: el.textContent
1046
+ };
1047
+ return isStrong(el) && span.marks.push("strong"), isUnderline(el) && span.marks.push("underline"), isEmphasis(el) && span.marks.push("em"), span;
1048
+ }
1049
+ }
1050
+ }
1051
+ ];
1052
+ }
1053
+ function getListStyleMap(el) {
1054
+ if (!isElement(el))
1055
+ return {};
1056
+ const body = el.closest("body") || el.ownerDocument?.body;
1057
+ if (!body)
1058
+ return {};
1059
+ const data = body.getAttribute("data-word-list-styles");
1060
+ if (!data)
1061
+ return {};
1062
+ try {
1063
+ return JSON.parse(data);
1064
+ } catch {
1065
+ return {};
1066
+ }
1067
+ }
1068
+ function getListItemStyle(el) {
1069
+ const style = isElement(el) && el.getAttribute("style");
1070
+ if (!style)
1071
+ return;
1072
+ const msoListMatch = style.match(/mso-list:\s*(l\d+)\s+(level\d+)\s+lfo\d+/);
1073
+ if (!msoListMatch)
1074
+ return;
1075
+ const key = `${msoListMatch[1]}:${msoListMatch[2]}`;
1076
+ return getListStyleMap(el)[key] || "bullet";
1077
+ }
1078
+ function getListItemLevel(el) {
1079
+ const style = isElement(el) && el.getAttribute("style");
1080
+ if (!style)
1081
+ return;
1082
+ const levelMatch = style.match(/level\d+/);
1083
+ if (!levelMatch)
1084
+ return;
1085
+ const [level] = levelMatch[0].match(/\d/) || [];
1086
+ return (level ? Number.parseInt(level, 10) : 1) || 1;
1087
+ }
1088
+ function isWordListElement(el) {
1089
+ if (!isElement(el))
1090
+ return !1;
1091
+ if (el.className && (el.className === "MsoListParagraphCxSpFirst" || el.className === "MsoListParagraphCxSpMiddle" || el.className === "MsoListParagraphCxSpLast"))
1092
+ return !0;
1093
+ const style = el.getAttribute("style");
1094
+ return !!(style && /mso-list:\s*l\d+\s+level\d+\s+lfo\d+/.test(style));
1095
+ }
1096
+ function getHeadingStyle(el) {
1097
+ const tag = tagName(el);
1098
+ if (tag && HTML_HEADER_TAGS[tag])
1099
+ return HTML_HEADER_TAGS[tag]?.style;
1100
+ }
1101
+ function createWordRules() {
1102
+ return [
1103
+ {
1104
+ deserialize(el, next) {
1105
+ const tag = tagName(el);
1106
+ if ((tag === "p" || HTML_HEADER_TAGS[tag || ""]) && isWordListElement(el)) {
1107
+ const headingStyle = getHeadingStyle(el);
1108
+ return {
1109
+ ...DEFAULT_BLOCK,
1110
+ listItem: getListItemStyle(el),
1111
+ level: getListItemLevel(el),
1112
+ style: headingStyle || BLOCK_DEFAULT_STYLE,
1113
+ children: next(el.childNodes)
1114
+ };
1115
+ }
1116
+ }
1117
+ }
1118
+ ];
1119
+ }
1120
+ function createRules(schema, options) {
1121
+ return [
1122
+ ...createWordRules(),
1123
+ ...createWordOnlineRules(schema, options),
1124
+ ...createNotionRules(),
1125
+ ...createGDocsRules(schema),
1126
+ ...createHTMLRules(schema, options)
1127
+ ];
1128
+ }
1129
+ function trimWhitespace(context, mode, blocks2) {
1130
+ const trimmedBlocks = [];
1131
+ let consecutiveEmptyCount = 0;
1132
+ for (const block of blocks2) {
1133
+ const trimmedBlock = isTextBlock(context, block) ? trimTextBlockWhitespace(block) : block;
1134
+ if (mode === "preserve") {
1135
+ trimmedBlocks.push(trimmedBlock);
1136
+ continue;
1137
+ }
1138
+ if (mode === "remove") {
1139
+ if (isEmptyTextBlock(context, trimmedBlock))
1140
+ continue;
1141
+ trimmedBlocks.push(trimmedBlock);
1142
+ continue;
1143
+ }
1144
+ if (mode === "normalize") {
1145
+ if (isEmptyTextBlock(context, trimmedBlock)) {
1146
+ consecutiveEmptyCount++, consecutiveEmptyCount === 1 && trimmedBlocks.push(trimmedBlock);
1147
+ continue;
1148
+ }
1149
+ trimmedBlocks.push(trimmedBlock), consecutiveEmptyCount = 0;
1150
+ }
1151
+ }
1152
+ return trimmedBlocks;
1153
+ }
1154
+ function isEmptyTextBlock(context, block) {
1155
+ return !(!isTextBlock(context, block) || block.children.some(
1156
+ (child) => !isSpan(context, child) || child.text.trim() !== ""
1157
+ ));
1158
+ }
1159
+ function trimTextBlockWhitespace(block) {
1160
+ let index = 0;
1161
+ for (const child of block.children) {
1162
+ if (!isMinimalSpan(child)) {
1163
+ index++;
1164
+ continue;
1165
+ }
1166
+ const nextChild = nextSpan(block, index), prevChild = prevSpan(block, index);
1167
+ index === 0 && (child.text = child.text.replace(/^[^\S\n]+/g, "")), index === block.children.length - 1 && (child.text = child.text.replace(/[^\S\n]+$/g, "")), /\s/.test(child.text.slice(Math.max(0, child.text.length - 1))) && nextChild && isMinimalSpan(nextChild) && /\s/.test(nextChild.text.slice(0, 1)) && (child.text = child.text.replace(/[^\S\n]+$/g, "")), /\s/.test(child.text.slice(0, 1)) && prevChild && isMinimalSpan(prevChild) && /\s/.test(prevChild.text.slice(Math.max(0, prevChild.text.length - 1))) && (child.text = child.text.replace(/^[^\S\n]+/g, "")), child.text || block.children.splice(index, 1), prevChild && Array.isArray(prevChild.marks) && isEqualMarks(prevChild.marks, child.marks) && isWhiteSpaceChar(child.text) ? (prevChild.text += " ", block.children.splice(index, 1)) : nextChild && Array.isArray(nextChild.marks) && isEqualMarks(nextChild.marks, child.marks) && isWhiteSpaceChar(child.text) && (nextChild.text = ` ${nextChild.text}`, block.children.splice(index, 1)), index++;
1168
+ }
1169
+ return block;
1170
+ }
1171
+ function nextSpan(block, index) {
1172
+ const next = block.children[index + 1];
1173
+ return next && next._type === "span" ? next : null;
1174
+ }
1175
+ function prevSpan(block, index) {
1176
+ const prev = block.children[index - 1];
1177
+ return prev && prev._type === "span" ? prev : null;
1178
+ }
1179
+ function isWhiteSpaceChar(text) {
1180
+ return ["\xA0", " "].includes(text);
1181
+ }
1182
+ class HtmlDeserializer {
1183
+ keyGenerator;
1184
+ schema;
1185
+ rules;
1186
+ parseHtml;
1187
+ whitespaceMode;
1188
+ _markDefs = [];
1189
+ /**
1190
+ * Create a new serializer respecting a Sanity block content type's schema
1191
+ *
1192
+ * @param schema - Schema definition
1193
+ * @param options - Options for the deserialization process
1194
+ */
1195
+ constructor(schema, options = {}) {
1196
+ const { rules = [], whitespaceMode = "preserve" } = options, standardRules = createRules(schema, {
1197
+ keyGenerator: options.keyGenerator,
1198
+ matchers: options.matchers
1199
+ });
1200
+ this.schema = schema, this.keyGenerator = options.keyGenerator ?? keyGenerator, this.rules = [...rules, ...standardRules], this.whitespaceMode = whitespaceMode;
1201
+ const parseHtml = options.parseHtml || defaultParseHtml();
1202
+ this.parseHtml = (html) => {
1203
+ const cleanHTML = vercelStegaClean(html), doc = parseHtml(cleanHTML);
1204
+ for (const processor of preprocessors)
1205
+ processor(cleanHTML, doc);
1206
+ return doc.body;
1207
+ };
1208
+ }
1209
+ /**
1210
+ * Deserialize HTML.
1211
+ *
1212
+ * @param html - The HTML to deserialize, as a string
1213
+ * @returns Array of blocks - either portable text blocks or other allowed blocks
1214
+ */
1215
+ deserialize = (html) => {
1216
+ this._markDefs = [];
1217
+ const { parseHtml } = this, fragment = parseHtml(html), children = Array.from(fragment.childNodes), blocks2 = trimWhitespace(
1218
+ { schema: this.schema },
1219
+ this.whitespaceMode,
1220
+ flattenNestedBlocks(
1221
+ { schema: this.schema },
1222
+ ensureRootIsBlocks(
1223
+ this.schema,
1224
+ this.deserializeElements(children)
1225
+ )
1226
+ )
1227
+ );
1228
+ return this._markDefs.length > 0 && blocks2.filter((block) => isTextBlock({ schema: this.schema }, block)).forEach((block) => {
1229
+ block.markDefs = block.markDefs || [], block.markDefs = block.markDefs.concat(
1230
+ this._markDefs.filter((def) => block.children.flatMap((child) => child.marks || []).includes(def._key))
1231
+ );
1232
+ }), blocks2.map((block) => (block._type === "block" && (block._type = this.schema.block.name), block));
1233
+ };
1234
+ /**
1235
+ * Deserialize an array of DOM elements.
1236
+ *
1237
+ * @param elements - Array of DOM elements to deserialize
1238
+ * @returns
1239
+ */
1240
+ deserializeElements = (elements = []) => {
1241
+ let nodes = [];
1242
+ return elements.forEach((element) => {
1243
+ nodes = nodes.concat(this.deserializeElement(element));
1244
+ }), nodes;
1245
+ };
1246
+ /**
1247
+ * Deserialize a DOM element
1248
+ *
1249
+ * @param element - Deserialize a DOM element
1250
+ * @returns
1251
+ */
1252
+ deserializeElement = (element) => {
1253
+ const next = (elements) => {
1254
+ if (isNodeList(elements))
1255
+ return this.deserializeElements(Array.from(elements));
1256
+ if (Array.isArray(elements))
1257
+ return this.deserializeElements(elements);
1258
+ if (elements)
1259
+ return this.deserializeElement(elements);
1260
+ }, block = (props) => ({
1261
+ _type: "__block",
1262
+ block: props
1263
+ });
1264
+ let node;
1265
+ for (let i = 0; i < this.rules.length; i++) {
1266
+ const rule = this.rules[i];
1267
+ if (!rule || !rule.deserialize)
1268
+ continue;
1269
+ const ret = rule.deserialize(element, next, block), type = resolveJsType(ret);
1270
+ if (type !== "array" && type !== "object" && type !== "null" && type !== "undefined")
1271
+ throw new Error(
1272
+ `A rule returned an invalid deserialized representation: "${node}".`
1273
+ );
1274
+ if (ret !== void 0) {
1275
+ {
1276
+ if (ret === null)
1277
+ throw new Error("Deserializer rule returned `null`");
1278
+ Array.isArray(ret) ? node = ret : isPlaceholderDecorator(ret) ? node = this.deserializeDecorator(ret) : isPlaceholderAnnotation(ret) ? node = this.deserializeAnnotation(ret) : node = ret;
1279
+ }
1280
+ if (ret && !Array.isArray(ret) && isMinimalBlock(ret) && "listItem" in ret) {
1281
+ let parent = element.parentNode?.parentNode;
1282
+ for (; parent && tagName(parent) === "li"; )
1283
+ parent = parent.parentNode?.parentNode, ret.level = ret.level ? ret.level + 1 : 1;
1284
+ }
1285
+ ret && !Array.isArray(ret) && isMinimalBlock(ret) && ret.style === "blockquote" && ret.children.forEach((child, index) => {
1286
+ isMinimalSpan(child) && child.text === "\r" && (child.text = `
1287
+ `, (index === 0 || index === ret.children.length - 1) && ret.children.splice(index, 1));
1288
+ });
1289
+ break;
1290
+ }
1291
+ }
1292
+ return node || next(element.childNodes) || [];
1293
+ };
1294
+ /**
1295
+ * Deserialize a `__decorator` type
1296
+ * (an internal made up type to process decorators exclusively)
1297
+ *
1298
+ * @param decorator -
1299
+ * @returns array of ...
1300
+ */
1301
+ deserializeDecorator = (decorator) => {
1302
+ const { name } = decorator, applyDecorator = (node) => {
1303
+ if (isPlaceholderDecorator(node))
1304
+ return this.deserializeDecorator(node);
1305
+ if (isMinimalSpan(node))
1306
+ node.marks = node.marks || [], node.text.trim() && node.marks.unshift(name);
1307
+ else if ("children" in node && Array.isArray(node.children)) {
1308
+ const block = node;
1309
+ block.children = block.children.map(applyDecorator);
1310
+ }
1311
+ return node;
1312
+ };
1313
+ return decorator.children.reduce((children, node) => {
1314
+ const ret = applyDecorator(node);
1315
+ return Array.isArray(ret) ? children.concat(ret) : (children.push(ret), children);
1316
+ }, []);
1317
+ };
1318
+ /**
1319
+ * Deserialize a `__annotation` object.
1320
+ * (an internal made up type to process annotations exclusively)
1321
+ *
1322
+ * @param annotation -
1323
+ * @returns Array of...
1324
+ */
1325
+ deserializeAnnotation = (annotation) => {
1326
+ const { markDef } = annotation;
1327
+ this._markDefs.push(markDef);
1328
+ const applyAnnotation = (node) => {
1329
+ if (isPlaceholderAnnotation(node))
1330
+ return this.deserializeAnnotation(node);
1331
+ if (isMinimalSpan(node))
1332
+ node.marks = node.marks || [], node.text.trim() && node.marks.unshift(markDef._key);
1333
+ else if ("children" in node && Array.isArray(node.children)) {
1334
+ const block = node;
1335
+ block.children = block.children.map(applyAnnotation);
1336
+ }
1337
+ return node;
1338
+ };
1339
+ return annotation.children.reduce((children, node) => {
1340
+ const ret = applyAnnotation(node);
1341
+ return Array.isArray(ret) ? children.concat(ret) : (children.push(ret), children);
1342
+ }, []);
1343
+ };
1344
+ }
1345
+ function normalizeBlock(node, options = {}) {
1346
+ const schema = {
1347
+ block: {
1348
+ name: options.blockTypeName || "block"
1349
+ },
1350
+ span: {
1351
+ name: "span"
1352
+ },
1353
+ styles: [],
1354
+ lists: [],
1355
+ decorators: [],
1356
+ annotations: [],
1357
+ blockObjects: [],
1358
+ inlineObjects: []
1359
+ };
1360
+ if (node._type !== (options.blockTypeName || "block"))
1361
+ return "_key" in node ? node : {
1362
+ ...node,
1363
+ _key: options.keyGenerator ? options.keyGenerator() : keyGenerator()
1364
+ };
1365
+ const block = {
1366
+ _key: options.keyGenerator ? options.keyGenerator() : keyGenerator(),
1367
+ children: [],
1368
+ markDefs: [],
1369
+ ...node
1370
+ }, lastChild = block.children[block.children.length - 1];
1371
+ if (!lastChild)
1372
+ return block.children = [
1373
+ {
1374
+ _type: "span",
1375
+ _key: options.keyGenerator ? options.keyGenerator() : keyGenerator(),
1376
+ text: "",
1377
+ marks: []
1378
+ }
1379
+ ], block;
1380
+ const usedMarkDefs = [], allowedDecorators = options.allowedDecorators && Array.isArray(options.allowedDecorators) ? options.allowedDecorators : !1;
1381
+ return block.children = block.children.reduce(
1382
+ (acc, child) => {
1383
+ const previousChild = acc[acc.length - 1];
1384
+ return previousChild && isSpan({ schema }, child) && isSpan({ schema }, previousChild) && isEqualMarks(previousChild.marks, child.marks) ? (lastChild && lastChild === child && child.text === "" && block.children.length > 1 || (previousChild.text += child.text), acc) : (acc.push(child), acc);
1385
+ },
1386
+ []
1387
+ ).map((child) => {
1388
+ if (!child)
1389
+ throw new Error("missing child");
1390
+ return child._key = options.keyGenerator ? options.keyGenerator() : keyGenerator(), isSpan({ schema }, child) && (child.marks ? allowedDecorators && (child.marks = child.marks.filter((mark) => {
1391
+ const isAllowed = allowedDecorators.includes(mark), isUsed = block.markDefs?.some((def) => def._key === mark);
1392
+ return isAllowed || isUsed;
1393
+ })) : child.marks = [], usedMarkDefs.push(...child.marks)), child;
1394
+ }), block.markDefs = (block.markDefs || []).filter(
1395
+ (markDef) => usedMarkDefs.includes(markDef._key)
1396
+ ), block;
1397
+ }
1398
+ function htmlToPortableText(html, options = {}) {
1399
+ const schema = options.schema ?? defaultSchema, keyGen = options.keyGenerator, matchers = toSchemaMatchers(options.types);
1400
+ return new HtmlDeserializer(schema, {
1401
+ keyGenerator: keyGen,
1402
+ rules: options.rules,
1403
+ parseHtml: options.parseHtml,
1404
+ whitespaceMode: options.whitespaceMode,
1405
+ matchers
1406
+ }).deserialize(html).map((block) => normalizeBlock(block, { keyGenerator: keyGen }));
1407
+ }
1408
+ function toSchemaMatchers(types) {
1409
+ if (!types?.image)
1410
+ return;
1411
+ const objectMatcher = types.image, adapt = (isInline) => ({ context, props }) => {
1412
+ const result = objectMatcher({
1413
+ context,
1414
+ value: props,
1415
+ isInline
1416
+ });
1417
+ if (result)
1418
+ return result;
1419
+ };
1420
+ return {
1421
+ image: adapt(!1),
1422
+ inlineImage: adapt(!0)
1423
+ };
1424
+ }
1425
+ function buildObjectMatcher(definition) {
1426
+ return ({ context, value, isInline }) => {
1427
+ const schemaDefinition = (isInline ? context.schema.inlineObjects : context.schema.blockObjects).find(
1428
+ (item) => item.name === definition.name
1429
+ );
1430
+ if (!schemaDefinition)
1431
+ return;
1432
+ const filteredValue = schemaDefinition.fields.reduce((filteredValue2, field) => {
1433
+ const fieldValue = value[field.name];
1434
+ return fieldValue !== void 0 && (filteredValue2[field.name] = fieldValue), filteredValue2;
1435
+ }, {});
1436
+ return {
1437
+ _key: context.keyGenerator(),
1438
+ _type: schemaDefinition.name,
1439
+ ...filteredValue
1440
+ };
1441
+ };
1442
+ }
1443
+ export {
1444
+ buildObjectMatcher,
1445
+ defaultSchema,
1446
+ htmlToPortableText
1447
+ };
1448
+ //# sourceMappingURL=index.js.map