@portabletext/block-tools 3.4.1 → 3.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.js CHANGED
@@ -2,8 +2,8 @@ import { sanitySchemaToPortableTextSchema } from "@portabletext/sanity-bridge";
2
2
  import { isTextBlock, isSpan } from "@portabletext/schema";
3
3
  import flatten from "lodash/flatten.js";
4
4
  import getRandomValues from "get-random-values-esm";
5
+ import { isElement, tagName, DEFAULT_SPAN, DEFAULT_BLOCK, HTML_HEADER_TAGS, HTML_BLOCK_TAGS, BLOCK_DEFAULT_STYLE, HTML_LIST_CONTAINER_TAGS, HTML_SPAN_TAGS, HTML_LIST_ITEM_TAGS, HTML_DECORATOR_TAGS, defaultParseHtml, preprocess, trimWhitespace, flattenNestedBlocks, ensureRootIsBlocks, resolveJsType, isPlaceholderDecorator, isPlaceholderAnnotation, isMinimalBlock, isMinimalSpan, isNodeList } from "./_chunks-es/helpers.js";
5
6
  import isEqual from "lodash/isEqual.js";
6
- import uniq from "lodash/uniq.js";
7
7
  function keyGenerator() {
8
8
  return randomKey(12);
9
9
  }
@@ -17,456 +17,6 @@ for (let i = 0; i < 256; ++i)
17
17
  function randomKey(length) {
18
18
  return whatwgRNG(length).reduce((str, n) => str + byteToHex[n], "").slice(0, length);
19
19
  }
20
- const objectToString = Object.prototype.toString;
21
- function resolveJsType(val) {
22
- switch (objectToString.call(val)) {
23
- case "[object Function]":
24
- return "function";
25
- case "[object Date]":
26
- return "date";
27
- case "[object RegExp]":
28
- return "regexp";
29
- case "[object Arguments]":
30
- return "arguments";
31
- case "[object Array]":
32
- return "array";
33
- case "[object String]":
34
- return "string";
35
- }
36
- return val === null ? "null" : val === void 0 ? "undefined" : val && typeof val == "object" && "nodeType" in val && val.nodeType === 1 ? "element" : val === Object(val) ? "object" : typeof val;
37
- }
38
- function isArbitraryTypedObject(object) {
39
- return isRecord(object) && typeof object._type == "string";
40
- }
41
- function isRecord(value) {
42
- return !!value && (typeof value == "object" || typeof value == "function");
43
- }
44
- function flattenNestedBlocks(context, blocks2) {
45
- return blocks2.flatMap((block) => {
46
- if (isBlockContainer(block))
47
- return flattenNestedBlocks(context, [block.block]);
48
- if (isTextBlock(context, block)) {
49
- const hasBlockObjects = block.children.some((child) => context.schema.blockObjects.some(
50
- (blockObject) => blockObject.name === child._type
51
- )), hasBlocks = block.children.some(
52
- (child) => child._type === "__block" || child._type === "block"
53
- );
54
- if (hasBlockObjects || hasBlocks) {
55
- const splitChildren = getSplitChildren(context, block);
56
- return splitChildren.length === 1 && splitChildren[0].type === "children" && isEqual(splitChildren[0].children, block.children) ? [block] : splitChildren.flatMap((slice) => slice.type === "block object" ? [slice.block] : slice.type === "block" ? flattenNestedBlocks(context, [
57
- slice.block
58
- ]) : slice.children.length > 0 ? slice.children.every(
59
- (child) => isSpan(context, child) && child.text.trim() === ""
60
- ) ? [] : flattenNestedBlocks(context, [
61
- {
62
- ...block,
63
- children: slice.children
64
- }
65
- ]) : []);
66
- }
67
- return [block];
68
- }
69
- return [block];
70
- });
71
- }
72
- function isBlockContainer(block) {
73
- return block._type === "__block" && isArbitraryTypedObject(block.block);
74
- }
75
- function getSplitChildren(context, block) {
76
- return block.children.reduce(
77
- (slices, child) => {
78
- const knownInlineObject = context.schema.inlineObjects.some(
79
- (inlineObject) => inlineObject.name === child._type
80
- ), knownBlockObject = context.schema.blockObjects.some(
81
- (blockObject) => blockObject.name === child._type
82
- ), lastSlice = slices.pop();
83
- return !isSpan(context, child) && !knownInlineObject && knownBlockObject ? [
84
- ...slices,
85
- ...lastSlice ? [lastSlice] : [],
86
- { type: "block object", block: child }
87
- ] : child._type === "__block" ? [
88
- ...slices,
89
- ...lastSlice ? [lastSlice] : [],
90
- {
91
- type: "block object",
92
- block: child.block
93
- }
94
- ] : child._type === "block" ? [
95
- ...slices,
96
- ...lastSlice ? [lastSlice] : [],
97
- { type: "block", block: child }
98
- ] : lastSlice && lastSlice.type === "children" ? [
99
- ...slices,
100
- {
101
- type: "children",
102
- children: [...lastSlice.children, child]
103
- }
104
- ] : [
105
- ...slices,
106
- ...lastSlice ? [lastSlice] : [],
107
- { type: "children", children: [child] }
108
- ];
109
- },
110
- []
111
- );
112
- }
113
- var s = { 0: 8203, 1: 8204, 2: 8205, 3: 8290, 4: 8291, 5: 8288, 6: 65279, 7: 8289, 8: 119155, 9: 119156, a: 119157, b: 119158, c: 119159, d: 119160, e: 119161, f: 119162 }, c = { 0: 8203, 1: 8204, 2: 8205, 3: 65279 };
114
- new Array(4).fill(String.fromCodePoint(c[0])).join("");
115
- Object.fromEntries(Object.entries(c).map((t) => t.reverse()));
116
- Object.fromEntries(Object.entries(s).map((t) => t.reverse()));
117
- var S = `${Object.values(s).map((t) => `\\u{${t.toString(16)}}`).join("")}`, f = new RegExp(`[${S}]{4,}`, "gu");
118
- function _(t) {
119
- var e;
120
- return { cleaned: t.replace(f, ""), encoded: ((e = t.match(f)) == null ? void 0 : e[0]) || "" };
121
- }
122
- function O(t) {
123
- return t && JSON.parse(_(JSON.stringify(t)).cleaned);
124
- }
125
- const PRESERVE_WHITESPACE_TAGS = ["pre", "textarea", "code"], BLOCK_DEFAULT_STYLE = "normal", DEFAULT_BLOCK = Object.freeze({
126
- _type: "block",
127
- markDefs: [],
128
- style: BLOCK_DEFAULT_STYLE
129
- }), DEFAULT_SPAN = Object.freeze({
130
- _type: "span",
131
- marks: []
132
- }), HTML_BLOCK_TAGS = {
133
- p: DEFAULT_BLOCK,
134
- blockquote: { ...DEFAULT_BLOCK, style: "blockquote" }
135
- }, HTML_SPAN_TAGS = {
136
- span: { object: "text" }
137
- }, HTML_LIST_CONTAINER_TAGS = {
138
- ol: { object: null },
139
- ul: { object: null }
140
- }, HTML_HEADER_TAGS = {
141
- h1: { ...DEFAULT_BLOCK, style: "h1" },
142
- h2: { ...DEFAULT_BLOCK, style: "h2" },
143
- h3: { ...DEFAULT_BLOCK, style: "h3" },
144
- h4: { ...DEFAULT_BLOCK, style: "h4" },
145
- h5: { ...DEFAULT_BLOCK, style: "h5" },
146
- h6: { ...DEFAULT_BLOCK, style: "h6" }
147
- }, HTML_MISC_TAGS = {
148
- br: { ...DEFAULT_BLOCK, style: BLOCK_DEFAULT_STYLE }
149
- }, HTML_DECORATOR_TAGS = {
150
- b: "strong",
151
- strong: "strong",
152
- i: "em",
153
- em: "em",
154
- u: "underline",
155
- s: "strike-through",
156
- strike: "strike-through",
157
- del: "strike-through",
158
- code: "code",
159
- sup: "sup",
160
- sub: "sub",
161
- ins: "ins",
162
- mark: "mark",
163
- small: "small"
164
- }, HTML_LIST_ITEM_TAGS = {
165
- li: {
166
- ...DEFAULT_BLOCK,
167
- style: BLOCK_DEFAULT_STYLE,
168
- level: 1,
169
- listItem: "bullet"
170
- }
171
- }, ELEMENT_MAP = {
172
- ...HTML_BLOCK_TAGS,
173
- ...HTML_SPAN_TAGS,
174
- ...HTML_LIST_CONTAINER_TAGS,
175
- ...HTML_LIST_ITEM_TAGS,
176
- ...HTML_HEADER_TAGS,
177
- ...HTML_MISC_TAGS
178
- };
179
- uniq(
180
- Object.values(ELEMENT_MAP).filter((tag) => "style" in tag).map((tag) => tag.style)
181
- );
182
- uniq(
183
- Object.values(HTML_DECORATOR_TAGS)
184
- );
185
- const _XPathResult = {
186
- BOOLEAN_TYPE: 3,
187
- ORDERED_NODE_ITERATOR_TYPE: 5,
188
- UNORDERED_NODE_SNAPSHOT_TYPE: 6
189
- };
190
- var preprocessGDocs = (_html, doc, options) => {
191
- const whitespaceOnPasteMode = options?.unstable_whitespaceOnPasteMode || "preserve";
192
- let gDocsRootOrSiblingNode = doc.evaluate(
193
- '//*[@id and contains(@id, "docs-internal-guid")]',
194
- doc,
195
- null,
196
- _XPathResult.ORDERED_NODE_ITERATOR_TYPE,
197
- null
198
- ).iterateNext();
199
- if (gDocsRootOrSiblingNode) {
200
- const isWrappedRootTag = tagName(gDocsRootOrSiblingNode) === "b";
201
- switch (isWrappedRootTag || (gDocsRootOrSiblingNode = doc.body), whitespaceOnPasteMode) {
202
- case "normalize":
203
- normalizeWhitespace(gDocsRootOrSiblingNode);
204
- break;
205
- case "remove":
206
- removeAllWhitespace(gDocsRootOrSiblingNode);
207
- break;
208
- }
209
- const childNodes = doc.evaluate(
210
- "//*",
211
- doc,
212
- null,
213
- _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
214
- null
215
- );
216
- for (let i = childNodes.snapshotLength - 1; i >= 0; i--) {
217
- const elm = childNodes.snapshotItem(i);
218
- elm?.setAttribute("data-is-google-docs", "true"), (elm?.parentElement === gDocsRootOrSiblingNode || !isWrappedRootTag && elm.parentElement === doc.body) && (elm?.setAttribute("data-is-root-node", "true"), tagName(elm)), tagName(elm) === "li" && elm.firstChild && tagName(elm?.firstChild) === "img" && elm.removeChild(elm.firstChild);
219
- }
220
- return isWrappedRootTag && doc.body.firstElementChild?.replaceWith(
221
- ...Array.from(gDocsRootOrSiblingNode.childNodes)
222
- ), doc;
223
- }
224
- return doc;
225
- };
226
- const unwantedWordDocumentPaths = [
227
- "/html/text()",
228
- "/html/head/text()",
229
- "/html/body/text()",
230
- "/html/body/ul/text()",
231
- "/html/body/ol/text()",
232
- "//comment()",
233
- "//style",
234
- "//xml",
235
- "//script",
236
- "//meta",
237
- "//link"
238
- ];
239
- var preprocessHTML = (_html, doc) => {
240
- const bodyTextNodes = doc.evaluate(
241
- "/html/body/text()",
242
- doc,
243
- null,
244
- _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
245
- null
246
- );
247
- for (let i = bodyTextNodes.snapshotLength - 1; i >= 0; i--) {
248
- const node = bodyTextNodes.snapshotItem(i), text = node.textContent || "";
249
- if (text.replace(/[^\S\n]+$/g, "")) {
250
- const newNode = doc.createElement("span");
251
- newNode.appendChild(doc.createTextNode(text)), node.parentNode?.replaceChild(newNode, node);
252
- } else
253
- node.parentNode?.removeChild(node);
254
- }
255
- const unwantedNodes = doc.evaluate(
256
- unwantedWordDocumentPaths.join("|"),
257
- doc,
258
- null,
259
- _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
260
- null
261
- );
262
- for (let i = unwantedNodes.snapshotLength - 1; i >= 0; i--) {
263
- const unwanted = unwantedNodes.snapshotItem(i);
264
- unwanted && unwanted.parentNode?.removeChild(unwanted);
265
- }
266
- return doc;
267
- }, preprocessNotion = (html, doc) => {
268
- const NOTION_REGEX = /<!-- notionvc:.*?-->/g;
269
- if (html.match(NOTION_REGEX)) {
270
- const childNodes = doc.evaluate(
271
- "//*",
272
- doc,
273
- null,
274
- _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
275
- null
276
- );
277
- for (let i = childNodes.snapshotLength - 1; i >= 0; i--)
278
- childNodes.snapshotItem(i)?.setAttribute("data-is-notion", "true");
279
- return doc;
280
- }
281
- return doc;
282
- }, preprocessWhitespace = (_2, doc) => {
283
- function processNode(node) {
284
- if (node.nodeType === _XPathResult.BOOLEAN_TYPE && !PRESERVE_WHITESPACE_TAGS.includes(
285
- node.parentElement?.tagName.toLowerCase() || ""
286
- ))
287
- node.textContent = node.textContent?.replace(/\s\s+/g, " ").replace(/[\r\n]+/g, " ") || "";
288
- else
289
- for (let i = 0; i < node.childNodes.length; i++)
290
- processNode(node.childNodes[i]);
291
- }
292
- return processNode(doc.body), doc;
293
- };
294
- const WORD_HTML_REGEX = /(class="?Mso|style=(?:"|')[^"]*?\bmso-|w:WordDocument|<o:\w+>|<\/font>)/, unwantedPaths = [
295
- "//o:p",
296
- "//span[@style='mso-list:Ignore']",
297
- "//span[@style='mso-list: Ignore']"
298
- ], mappedPaths = [
299
- "//p[@class='MsoTocHeading']",
300
- "//p[@class='MsoTitle']",
301
- "//p[@class='MsoToaHeading']",
302
- "//p[@class='MsoSubtitle']",
303
- "//span[@class='MsoSubtleEmphasis']",
304
- "//span[@class='MsoIntenseEmphasis']"
305
- ], elementMap = {
306
- MsoTocHeading: ["h3"],
307
- MsoTitle: ["h1"],
308
- MsoToaHeading: ["h2"],
309
- MsoSubtitle: ["h5"],
310
- MsoSubtleEmphasis: ["span", "em"],
311
- MsoIntenseEmphasis: ["span", "em", "strong"]
312
- // Remove cruft
313
- };
314
- function isWordHtml(html) {
315
- return WORD_HTML_REGEX.test(html);
316
- }
317
- var preprocessWord = (html, doc) => {
318
- if (!isWordHtml(html))
319
- return doc;
320
- const unwantedNodes = doc.evaluate(
321
- unwantedPaths.join("|"),
322
- doc,
323
- (prefix) => prefix === "o" ? "urn:schemas-microsoft-com:office:office" : null,
324
- _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
325
- null
326
- );
327
- for (let i = unwantedNodes.snapshotLength - 1; i >= 0; i--) {
328
- const unwanted = unwantedNodes.snapshotItem(i);
329
- unwanted?.parentNode && unwanted.parentNode.removeChild(unwanted);
330
- }
331
- const mappedElements = doc.evaluate(
332
- mappedPaths.join("|"),
333
- doc,
334
- null,
335
- _XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE,
336
- null
337
- );
338
- for (let i = mappedElements.snapshotLength - 1; i >= 0; i--) {
339
- const mappedElm = mappedElements.snapshotItem(i), tags = elementMap[mappedElm.className], text = doc.createTextNode(mappedElm.textContent || "");
340
- if (!tags)
341
- continue;
342
- const parentElement = doc.createElement(tags[0]);
343
- let parent = parentElement, child = parentElement;
344
- tags.slice(1).forEach((tag) => {
345
- child = doc.createElement(tag), parent.appendChild(child), parent = child;
346
- }), child.appendChild(text), mappedElm?.parentNode?.replaceChild(parentElement, mappedElm);
347
- }
348
- return doc;
349
- }, preprocessors = [
350
- preprocessWhitespace,
351
- preprocessNotion,
352
- preprocessWord,
353
- preprocessGDocs,
354
- preprocessHTML
355
- ];
356
- function tagName(el) {
357
- if (el && "tagName" in el)
358
- return el.tagName.toLowerCase();
359
- }
360
- function preprocess(html, parseHtml, options) {
361
- const cleanHTML = O(html), doc = parseHtml(normalizeHtmlBeforePreprocess(cleanHTML));
362
- return preprocessors.forEach((processor) => {
363
- processor(cleanHTML, doc, options);
364
- }), doc;
365
- }
366
- function normalizeHtmlBeforePreprocess(html) {
367
- return html.trim();
368
- }
369
- function defaultParseHtml() {
370
- if (resolveJsType(DOMParser) === "undefined")
371
- throw new Error(
372
- "The native `DOMParser` global which the `Html` deserializer uses by default is not present in this environment. You must supply the `options.parseHtml` function instead."
373
- );
374
- return (html) => new DOMParser().parseFromString(html, "text/html");
375
- }
376
- function nextSpan(block, index) {
377
- const next = block.children[index + 1];
378
- return next && next._type === "span" ? next : null;
379
- }
380
- function prevSpan(block, index) {
381
- const prev = block.children[index - 1];
382
- return prev && prev._type === "span" ? prev : null;
383
- }
384
- function isWhiteSpaceChar(text) {
385
- return ["\xA0", " "].includes(text);
386
- }
387
- function trimWhitespace(schema, blocks2) {
388
- return blocks2.forEach((block) => {
389
- isTextBlock({ schema }, block) && block.children.forEach((child, index) => {
390
- if (!isMinimalSpan(child))
391
- return;
392
- const nextChild = nextSpan(block, index), prevChild = prevSpan(block, index);
393
- index === 0 && (child.text = child.text.replace(/^[^\S\n]+/g, "")), index === block.children.length - 1 && (child.text = child.text.replace(/[^\S\n]+$/g, "")), /\s/.test(child.text.slice(Math.max(0, child.text.length - 1))) && nextChild && isMinimalSpan(nextChild) && /\s/.test(nextChild.text.slice(0, 1)) && (child.text = child.text.replace(/[^\S\n]+$/g, "")), /\s/.test(child.text.slice(0, 1)) && prevChild && isMinimalSpan(prevChild) && /\s/.test(prevChild.text.slice(Math.max(0, prevChild.text.length - 1))) && (child.text = child.text.replace(/^[^\S\n]+/g, "")), child.text || block.children.splice(index, 1), prevChild && isEqual(prevChild.marks, child.marks) && isWhiteSpaceChar(child.text) ? (prevChild.text += " ", block.children.splice(index, 1)) : nextChild && isEqual(nextChild.marks, child.marks) && isWhiteSpaceChar(child.text) && (nextChild.text = ` ${nextChild.text}`, block.children.splice(index, 1));
394
- });
395
- }), blocks2;
396
- }
397
- function ensureRootIsBlocks(schema, objects) {
398
- return objects.reduce((blocks2, node, i, original) => {
399
- if (node._type === "block")
400
- return blocks2.push(node), blocks2;
401
- if (node._type === "__block")
402
- return blocks2.push(node.block), blocks2;
403
- const lastBlock = blocks2[blocks2.length - 1];
404
- if (i > 0 && !isTextBlock({ schema }, original[i - 1]) && isTextBlock({ schema }, lastBlock))
405
- return lastBlock.children.push(node), blocks2;
406
- const block = {
407
- ...DEFAULT_BLOCK,
408
- children: [node]
409
- };
410
- return blocks2.push(block), blocks2;
411
- }, []);
412
- }
413
- function isNodeList(node) {
414
- return Object.prototype.toString.call(node) === "[object NodeList]";
415
- }
416
- function isMinimalSpan(node) {
417
- return node._type === "span";
418
- }
419
- function isMinimalBlock(node) {
420
- return node._type === "block";
421
- }
422
- function isPlaceholderDecorator(node) {
423
- return node._type === "__decorator";
424
- }
425
- function isPlaceholderAnnotation(node) {
426
- return node._type === "__annotation";
427
- }
428
- function isElement(node) {
429
- return node.nodeType === 1;
430
- }
431
- function normalizeWhitespace(rootNode) {
432
- let emptyBlockCount = 0, lastParent = null;
433
- const nodesToRemove = [];
434
- for (let child = rootNode.firstChild; child; child = child.nextSibling) {
435
- if (!isElement(child)) {
436
- normalizeWhitespace(child), emptyBlockCount = 0;
437
- continue;
438
- }
439
- const elm = child;
440
- isWhitespaceBlock(elm) ? (lastParent && elm.parentElement === lastParent ? (emptyBlockCount++, emptyBlockCount > 1 && nodesToRemove.push(elm)) : emptyBlockCount = 1, lastParent = elm.parentElement) : (normalizeWhitespace(child), emptyBlockCount = 0);
441
- }
442
- nodesToRemove.forEach((node) => {
443
- node.parentElement?.removeChild(node);
444
- });
445
- }
446
- function removeAllWhitespace(rootNode) {
447
- const nodesToRemove = [];
448
- function collectNodesToRemove(currentNode) {
449
- if (isElement(currentNode)) {
450
- const elm = currentNode;
451
- if (tagName(elm) === "br" && (tagName(elm.nextElementSibling) === "p" || tagName(elm.previousElementSibling) === "p")) {
452
- nodesToRemove.push(elm);
453
- return;
454
- }
455
- if ((tagName(elm) === "p" || tagName(elm) === "br") && elm?.firstChild?.textContent?.trim() === "") {
456
- nodesToRemove.push(elm);
457
- return;
458
- }
459
- for (let child = elm.firstChild; child; child = child.nextSibling)
460
- collectNodesToRemove(child);
461
- }
462
- }
463
- collectNodesToRemove(rootNode), nodesToRemove.forEach((node) => {
464
- node.parentElement?.removeChild(node);
465
- });
466
- }
467
- function isWhitespaceBlock(elm) {
468
- return ["p", "br"].includes(tagName(elm) || "") && !elm.textContent?.trim();
469
- }
470
20
  const LIST_CONTAINER_TAGS = Object.keys(HTML_LIST_CONTAINER_TAGS);
471
21
  function isEmphasis$1(el) {
472
22
  const style = isElement(el) && el.getAttribute("style");
@@ -515,7 +65,7 @@ const blocks = {
515
65
  };
516
66
  function getBlockStyle(schema, el) {
517
67
  const childTag = tagName(el.firstChild), block = childTag && blocks[childTag];
518
- return block && schema.styles.some((style) => style.name === block.style) ? block.style : BLOCK_DEFAULT_STYLE;
68
+ return block ? schema.styles.some((style) => style.name === block.style) ? block.style : BLOCK_DEFAULT_STYLE : BLOCK_DEFAULT_STYLE;
519
69
  }
520
70
  function createGDocsRules(schema) {
521
71
  return [
@@ -911,7 +461,7 @@ class HtmlDeserializer {
911
461
  const { parseHtml } = this, fragment = parseHtml(html), children = Array.from(fragment.childNodes), blocks2 = trimWhitespace(
912
462
  this.schema,
913
463
  flattenNestedBlocks(
914
- { schema: this.schema, keyGenerator: this.keyGenerator },
464
+ { schema: this.schema },
915
465
  ensureRootIsBlocks(
916
466
  this.schema,
917
467
  this.deserializeElements(children)