@portabletext/block-tools 4.0.2 → 4.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/_chunks-es/helpers.js +1 -64
- package/lib/_chunks-es/helpers.js.map +1 -1
- package/lib/index.js +487 -38
- package/lib/index.js.map +1 -1
- package/package.json +9 -9
- package/src/HtmlDeserializer/helpers.ts +1 -183
- package/src/HtmlDeserializer/index.ts +14 -25
- package/src/HtmlDeserializer/preprocessors/index.ts +8 -6
- package/src/HtmlDeserializer/preprocessors/{gdocs.ts → preprocessor.gdocs.ts} +2 -22
- package/src/HtmlDeserializer/preprocessors/{html.ts → preprocessor.html.ts} +1 -1
- package/src/HtmlDeserializer/preprocessors/{notion.ts → preprocessor.notion.ts} +1 -1
- package/src/HtmlDeserializer/preprocessors/{whitespace.ts → preprocessor.whitespace.ts} +28 -3
- package/src/HtmlDeserializer/preprocessors/{word.ts → preprocessor.word.ts} +1 -1
- package/src/HtmlDeserializer/rules/index.ts +6 -4
- package/src/HtmlDeserializer/rules/{gdocs.ts → rules.gdocs.ts} +1 -1
- package/src/HtmlDeserializer/rules/{html.ts → rules.html.ts} +3 -3
- package/src/HtmlDeserializer/rules/{notion.ts → rules.notion.ts} +1 -1
- package/src/HtmlDeserializer/rules/rules.word.ts +95 -0
- package/src/HtmlDeserializer/trim-whitespace.ts +157 -0
- package/src/HtmlDeserializer/word-online/asserters.word-online.ts +153 -0
- package/src/HtmlDeserializer/word-online/preprocessor.word-online.ts +263 -0
- package/src/HtmlDeserializer/word-online/rules.word-online.ts +390 -0
- package/src/HtmlDeserializer/rules/word.ts +0 -59
- /package/src/HtmlDeserializer/rules/{whitespace-text-node.ts → rules.whitespace-text-node.ts} +0 -0
|
@@ -1,10 +1,5 @@
|
|
|
1
1
|
import type {Schema} from '@portabletext/schema'
|
|
2
|
-
import {
|
|
3
|
-
isTextBlock,
|
|
4
|
-
type PortableTextObject,
|
|
5
|
-
type PortableTextTextBlock,
|
|
6
|
-
} from '@portabletext/schema'
|
|
7
|
-
import {isEqual} from 'lodash'
|
|
2
|
+
import {isTextBlock, type PortableTextObject} from '@portabletext/schema'
|
|
8
3
|
import {DEFAULT_BLOCK} from '../constants'
|
|
9
4
|
import type {
|
|
10
5
|
ArbitraryTypedObject,
|
|
@@ -49,88 +44,6 @@ export function defaultParseHtml(): HtmlParser {
|
|
|
49
44
|
}
|
|
50
45
|
}
|
|
51
46
|
|
|
52
|
-
function nextSpan(block: PortableTextTextBlock, index: number) {
|
|
53
|
-
const next = block.children[index + 1]
|
|
54
|
-
return next && next._type === 'span' ? next : null
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
function prevSpan(block: PortableTextTextBlock, index: number) {
|
|
58
|
-
const prev = block.children[index - 1]
|
|
59
|
-
return prev && prev._type === 'span' ? prev : null
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
function isWhiteSpaceChar(text: string) {
|
|
63
|
-
return ['\xa0', ' '].includes(text)
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
/**
|
|
67
|
-
* NOTE: _mutates_ passed blocks!
|
|
68
|
-
*
|
|
69
|
-
* @param blocks - Array of blocks to trim whitespace for
|
|
70
|
-
* @returns
|
|
71
|
-
*/
|
|
72
|
-
export function trimWhitespace(
|
|
73
|
-
schema: Schema,
|
|
74
|
-
blocks: TypedObject[],
|
|
75
|
-
): TypedObject[] {
|
|
76
|
-
blocks.forEach((block) => {
|
|
77
|
-
if (!isTextBlock({schema}, block)) {
|
|
78
|
-
return
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
// eslint-disable-next-line complexity
|
|
82
|
-
block.children.forEach((child, index) => {
|
|
83
|
-
if (!isMinimalSpan(child)) {
|
|
84
|
-
return
|
|
85
|
-
}
|
|
86
|
-
const nextChild = nextSpan(block, index)
|
|
87
|
-
const prevChild = prevSpan(block, index)
|
|
88
|
-
if (index === 0) {
|
|
89
|
-
child.text = child.text.replace(/^[^\S\n]+/g, '')
|
|
90
|
-
}
|
|
91
|
-
if (index === block.children.length - 1) {
|
|
92
|
-
child.text = child.text.replace(/[^\S\n]+$/g, '')
|
|
93
|
-
}
|
|
94
|
-
if (
|
|
95
|
-
/\s/.test(child.text.slice(Math.max(0, child.text.length - 1))) &&
|
|
96
|
-
nextChild &&
|
|
97
|
-
isMinimalSpan(nextChild) &&
|
|
98
|
-
/\s/.test(nextChild.text.slice(0, 1))
|
|
99
|
-
) {
|
|
100
|
-
child.text = child.text.replace(/[^\S\n]+$/g, '')
|
|
101
|
-
}
|
|
102
|
-
if (
|
|
103
|
-
/\s/.test(child.text.slice(0, 1)) &&
|
|
104
|
-
prevChild &&
|
|
105
|
-
isMinimalSpan(prevChild) &&
|
|
106
|
-
/\s/.test(prevChild.text.slice(Math.max(0, prevChild.text.length - 1)))
|
|
107
|
-
) {
|
|
108
|
-
child.text = child.text.replace(/^[^\S\n]+/g, '')
|
|
109
|
-
}
|
|
110
|
-
if (!child.text) {
|
|
111
|
-
block.children.splice(index, 1)
|
|
112
|
-
}
|
|
113
|
-
if (
|
|
114
|
-
prevChild &&
|
|
115
|
-
isEqual(prevChild.marks, child.marks) &&
|
|
116
|
-
isWhiteSpaceChar(child.text)
|
|
117
|
-
) {
|
|
118
|
-
prevChild.text += ' '
|
|
119
|
-
block.children.splice(index, 1)
|
|
120
|
-
} else if (
|
|
121
|
-
nextChild &&
|
|
122
|
-
isEqual(nextChild.marks, child.marks) &&
|
|
123
|
-
isWhiteSpaceChar(child.text)
|
|
124
|
-
) {
|
|
125
|
-
nextChild.text = ` ${nextChild.text}`
|
|
126
|
-
block.children.splice(index, 1)
|
|
127
|
-
}
|
|
128
|
-
})
|
|
129
|
-
})
|
|
130
|
-
|
|
131
|
-
return blocks
|
|
132
|
-
}
|
|
133
|
-
|
|
134
47
|
export function ensureRootIsBlocks(
|
|
135
48
|
schema: Schema,
|
|
136
49
|
objects: Array<ArbitraryTypedObject>,
|
|
@@ -193,98 +106,3 @@ export function isPlaceholderAnnotation(
|
|
|
193
106
|
export function isElement(node: Node): node is Element {
|
|
194
107
|
return node.nodeType === 1
|
|
195
108
|
}
|
|
196
|
-
|
|
197
|
-
/**
|
|
198
|
-
* Helper to normalize whitespace to only 1 empty block between content nodes
|
|
199
|
-
* @param node - Root node to process
|
|
200
|
-
*/
|
|
201
|
-
export function normalizeWhitespace(rootNode: Node) {
|
|
202
|
-
let emptyBlockCount = 0
|
|
203
|
-
let lastParent = null
|
|
204
|
-
const nodesToRemove: Node[] = []
|
|
205
|
-
|
|
206
|
-
for (let child = rootNode.firstChild; child; child = child.nextSibling) {
|
|
207
|
-
if (!isElement(child)) {
|
|
208
|
-
normalizeWhitespace(child)
|
|
209
|
-
emptyBlockCount = 0
|
|
210
|
-
continue
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
const elm = child as HTMLElement
|
|
214
|
-
|
|
215
|
-
if (isWhitespaceBlock(elm)) {
|
|
216
|
-
if (lastParent && elm.parentElement === lastParent) {
|
|
217
|
-
emptyBlockCount++
|
|
218
|
-
if (emptyBlockCount > 1) {
|
|
219
|
-
nodesToRemove.push(elm)
|
|
220
|
-
}
|
|
221
|
-
} else {
|
|
222
|
-
// Different parent, reset counter
|
|
223
|
-
emptyBlockCount = 1
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
lastParent = elm.parentElement
|
|
227
|
-
} else {
|
|
228
|
-
// Recurse into child nodes
|
|
229
|
-
normalizeWhitespace(child)
|
|
230
|
-
// Reset counter for siblings
|
|
231
|
-
emptyBlockCount = 0
|
|
232
|
-
}
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
// Remove marked nodes
|
|
236
|
-
nodesToRemove.forEach((node) => {
|
|
237
|
-
node.parentElement?.removeChild(node)
|
|
238
|
-
})
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
/**
|
|
242
|
-
* Helper to remove all whitespace nodes
|
|
243
|
-
* @param node - Root node to process
|
|
244
|
-
*/
|
|
245
|
-
export function removeAllWhitespace(rootNode: Node) {
|
|
246
|
-
const nodesToRemove: Node[] = []
|
|
247
|
-
|
|
248
|
-
function collectNodesToRemove(currentNode: Node) {
|
|
249
|
-
if (isElement(currentNode)) {
|
|
250
|
-
const elm = currentNode as HTMLElement
|
|
251
|
-
|
|
252
|
-
// Handle <br> tags that is between <p> tags
|
|
253
|
-
if (
|
|
254
|
-
tagName(elm) === 'br' &&
|
|
255
|
-
(tagName(elm.nextElementSibling) === 'p' ||
|
|
256
|
-
tagName(elm.previousElementSibling) === 'p')
|
|
257
|
-
) {
|
|
258
|
-
nodesToRemove.push(elm)
|
|
259
|
-
|
|
260
|
-
return
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
// Handle empty blocks
|
|
264
|
-
if (
|
|
265
|
-
(tagName(elm) === 'p' || tagName(elm) === 'br') &&
|
|
266
|
-
elm?.firstChild?.textContent?.trim() === ''
|
|
267
|
-
) {
|
|
268
|
-
nodesToRemove.push(elm)
|
|
269
|
-
|
|
270
|
-
return
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
// Recursively process child nodes
|
|
274
|
-
for (let child = elm.firstChild; child; child = child.nextSibling) {
|
|
275
|
-
collectNodesToRemove(child)
|
|
276
|
-
}
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
collectNodesToRemove(rootNode)
|
|
281
|
-
|
|
282
|
-
// Remove the collected nodes
|
|
283
|
-
nodesToRemove.forEach((node) => {
|
|
284
|
-
node.parentElement?.removeChild(node)
|
|
285
|
-
})
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
function isWhitespaceBlock(elm: HTMLElement): boolean {
|
|
289
|
-
return ['p', 'br'].includes(tagName(elm) || '') && !elm.textContent?.trim()
|
|
290
|
-
}
|
|
@@ -10,8 +10,6 @@ import type {
|
|
|
10
10
|
ArbitraryTypedObject,
|
|
11
11
|
DeserializerRule,
|
|
12
12
|
HtmlDeserializerOptions,
|
|
13
|
-
HtmlParser,
|
|
14
|
-
HtmlPreprocessorOptions,
|
|
15
13
|
PlaceholderAnnotation,
|
|
16
14
|
PlaceholderDecorator,
|
|
17
15
|
TypedObject,
|
|
@@ -28,10 +26,10 @@ import {
|
|
|
28
26
|
isPlaceholderAnnotation,
|
|
29
27
|
isPlaceholderDecorator,
|
|
30
28
|
tagName,
|
|
31
|
-
trimWhitespace,
|
|
32
29
|
} from './helpers'
|
|
33
|
-
import preprocessors from './preprocessors'
|
|
30
|
+
import {preprocessors} from './preprocessors'
|
|
34
31
|
import {createRules} from './rules'
|
|
32
|
+
import {trimWhitespace} from './trim-whitespace'
|
|
35
33
|
|
|
36
34
|
/**
|
|
37
35
|
* HTML Deserializer
|
|
@@ -42,6 +40,7 @@ export default class HtmlDeserializer {
|
|
|
42
40
|
schema: Schema
|
|
43
41
|
rules: DeserializerRule[]
|
|
44
42
|
parseHtml: (html: string) => HTMLElement
|
|
43
|
+
whitespaceMode: 'preserve' | 'remove' | 'normalize'
|
|
45
44
|
_markDefs: PortableTextObject[] = []
|
|
46
45
|
|
|
47
46
|
/**
|
|
@@ -59,9 +58,16 @@ export default class HtmlDeserializer {
|
|
|
59
58
|
this.schema = schema
|
|
60
59
|
this.keyGenerator = options.keyGenerator ?? keyGenerator
|
|
61
60
|
this.rules = [...rules, ...standardRules]
|
|
61
|
+
this.whitespaceMode = unstable_whitespaceOnPasteMode
|
|
62
62
|
const parseHtml = options.parseHtml || defaultParseHtml()
|
|
63
63
|
this.parseHtml = (html) => {
|
|
64
|
-
const
|
|
64
|
+
const cleanHTML = vercelStegaClean(html)
|
|
65
|
+
const doc = parseHtml(cleanHTML)
|
|
66
|
+
|
|
67
|
+
for (const processor of preprocessors) {
|
|
68
|
+
processor(cleanHTML, doc)
|
|
69
|
+
}
|
|
70
|
+
|
|
65
71
|
return doc.body
|
|
66
72
|
}
|
|
67
73
|
}
|
|
@@ -77,9 +83,10 @@ export default class HtmlDeserializer {
|
|
|
77
83
|
const {parseHtml} = this
|
|
78
84
|
const fragment = parseHtml(html)
|
|
79
85
|
const children = Array.from(fragment.childNodes) as HTMLElement[]
|
|
80
|
-
|
|
86
|
+
|
|
81
87
|
const blocks = trimWhitespace(
|
|
82
|
-
this.schema,
|
|
88
|
+
{schema: this.schema},
|
|
89
|
+
this.whitespaceMode,
|
|
83
90
|
flattenNestedBlocks(
|
|
84
91
|
{schema: this.schema},
|
|
85
92
|
ensureRootIsBlocks(
|
|
@@ -306,21 +313,3 @@ export default class HtmlDeserializer {
|
|
|
306
313
|
}, [] as TypedObject[])
|
|
307
314
|
}
|
|
308
315
|
}
|
|
309
|
-
|
|
310
|
-
// TODO: make this plugin-style
|
|
311
|
-
function preprocess(
|
|
312
|
-
html: string,
|
|
313
|
-
parseHtml: HtmlParser,
|
|
314
|
-
options: HtmlPreprocessorOptions,
|
|
315
|
-
): Document {
|
|
316
|
-
const cleanHTML = vercelStegaClean(html)
|
|
317
|
-
const doc = parseHtml(normalizeHtmlBeforePreprocess(cleanHTML))
|
|
318
|
-
preprocessors.forEach((processor) => {
|
|
319
|
-
processor(cleanHTML, doc, options)
|
|
320
|
-
})
|
|
321
|
-
return doc
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
function normalizeHtmlBeforePreprocess(html: string): string {
|
|
325
|
-
return html.trim()
|
|
326
|
-
}
|
|
@@ -1,13 +1,15 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
import
|
|
4
|
-
import
|
|
5
|
-
import
|
|
1
|
+
import {preprocessWordOnline} from '../word-online/preprocessor.word-online'
|
|
2
|
+
import {preprocessGDocs} from './preprocessor.gdocs'
|
|
3
|
+
import {preprocessHTML} from './preprocessor.html'
|
|
4
|
+
import {preprocessNotion} from './preprocessor.notion'
|
|
5
|
+
import {preprocessWhitespace} from './preprocessor.whitespace'
|
|
6
|
+
import {preprocessWord} from './preprocessor.word'
|
|
6
7
|
|
|
7
|
-
export
|
|
8
|
+
export const preprocessors = [
|
|
8
9
|
preprocessWhitespace,
|
|
9
10
|
preprocessNotion,
|
|
10
11
|
preprocessWord,
|
|
12
|
+
preprocessWordOnline,
|
|
11
13
|
preprocessGDocs,
|
|
12
14
|
preprocessHTML,
|
|
13
15
|
]
|
|
@@ -1,14 +1,7 @@
|
|
|
1
|
-
import
|
|
2
|
-
import {normalizeWhitespace, removeAllWhitespace, tagName} from '../helpers'
|
|
1
|
+
import {tagName} from '../helpers'
|
|
3
2
|
import {_XPathResult} from './xpathResult'
|
|
4
3
|
|
|
5
|
-
export
|
|
6
|
-
_html: string,
|
|
7
|
-
doc: Document,
|
|
8
|
-
options: HtmlPreprocessorOptions,
|
|
9
|
-
): Document => {
|
|
10
|
-
const whitespaceOnPasteMode =
|
|
11
|
-
options?.unstable_whitespaceOnPasteMode || 'preserve'
|
|
4
|
+
export function preprocessGDocs(_html: string, doc: Document): Document {
|
|
12
5
|
let gDocsRootOrSiblingNode = doc
|
|
13
6
|
.evaluate(
|
|
14
7
|
'//*[@id and contains(@id, "docs-internal-guid")]',
|
|
@@ -27,19 +20,6 @@ export default (
|
|
|
27
20
|
gDocsRootOrSiblingNode = doc.body
|
|
28
21
|
}
|
|
29
22
|
|
|
30
|
-
switch (whitespaceOnPasteMode) {
|
|
31
|
-
case 'normalize':
|
|
32
|
-
// Keep only 1 empty block between content nodes
|
|
33
|
-
normalizeWhitespace(gDocsRootOrSiblingNode)
|
|
34
|
-
break
|
|
35
|
-
case 'remove':
|
|
36
|
-
// Remove all whitespace nodes
|
|
37
|
-
removeAllWhitespace(gDocsRootOrSiblingNode)
|
|
38
|
-
break
|
|
39
|
-
default:
|
|
40
|
-
break
|
|
41
|
-
}
|
|
42
|
-
|
|
43
23
|
// Tag every child with attribute 'is-google-docs' so that the GDocs rule-set can
|
|
44
24
|
// work exclusivly on these children
|
|
45
25
|
const childNodes = doc.evaluate(
|
|
@@ -15,7 +15,7 @@ const unwantedWordDocumentPaths = [
|
|
|
15
15
|
'//link',
|
|
16
16
|
]
|
|
17
17
|
|
|
18
|
-
export
|
|
18
|
+
export function preprocessHTML(_html: string, doc: Document): Document {
|
|
19
19
|
// Make sure text directly on the body is wrapped in spans.
|
|
20
20
|
// This mimics what the browser does before putting html on the clipboard,
|
|
21
21
|
// when used in a script context with JSDOM
|
|
@@ -1,7 +1,19 @@
|
|
|
1
1
|
import {PRESERVE_WHITESPACE_TAGS} from '../../constants'
|
|
2
2
|
import {_XPathResult} from './xpathResult'
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
// Elements that only contain block-level children (not inline text content)
|
|
5
|
+
const BLOCK_CONTAINER_ELEMENTS = [
|
|
6
|
+
'body',
|
|
7
|
+
'table',
|
|
8
|
+
'tbody',
|
|
9
|
+
'thead',
|
|
10
|
+
'tfoot',
|
|
11
|
+
'tr',
|
|
12
|
+
'ul',
|
|
13
|
+
'ol',
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
export function preprocessWhitespace(_: string, doc: Document): Document {
|
|
5
17
|
// Recursively process all nodes.
|
|
6
18
|
function processNode(node: Node) {
|
|
7
19
|
// If this is a text node and not inside a tag where whitespace should be preserved, process it.
|
|
@@ -11,14 +23,27 @@ export default (_: string, doc: Document): Document => {
|
|
|
11
23
|
node.parentElement?.tagName.toLowerCase() || '',
|
|
12
24
|
)
|
|
13
25
|
) {
|
|
14
|
-
|
|
26
|
+
const normalized =
|
|
15
27
|
node.textContent
|
|
16
28
|
?.replace(/\s\s+/g, ' ') // Remove multiple whitespace
|
|
17
29
|
.replace(/[\r\n]+/g, ' ') || '' // Replace newlines with spaces
|
|
30
|
+
const parentTag = node.parentElement?.tagName.toLowerCase()
|
|
31
|
+
|
|
32
|
+
if (
|
|
33
|
+
parentTag &&
|
|
34
|
+
BLOCK_CONTAINER_ELEMENTS.includes(parentTag) &&
|
|
35
|
+
normalized.trim() === ''
|
|
36
|
+
) {
|
|
37
|
+
// If parent is a block container and text is only whitespace, remove it
|
|
38
|
+
node.parentNode?.removeChild(node)
|
|
39
|
+
} else {
|
|
40
|
+
node.textContent = normalized
|
|
41
|
+
}
|
|
18
42
|
}
|
|
19
43
|
// Otherwise, if this node has children, process them.
|
|
20
44
|
else {
|
|
21
|
-
|
|
45
|
+
// Process children in reverse to handle removals safely
|
|
46
|
+
for (let i = node.childNodes.length - 1; i >= 0; i--) {
|
|
22
47
|
processNode(node.childNodes[i])
|
|
23
48
|
}
|
|
24
49
|
}
|
|
@@ -35,7 +35,7 @@ function isWordHtml(html: string) {
|
|
|
35
35
|
return WORD_HTML_REGEX.test(html)
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
-
export
|
|
38
|
+
export function preprocessWord(html: string, doc: Document): Document {
|
|
39
39
|
if (!isWordHtml(html)) {
|
|
40
40
|
return doc
|
|
41
41
|
}
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import type {Schema} from '@portabletext/schema'
|
|
2
2
|
import type {SchemaMatchers} from '../../schema-matchers'
|
|
3
3
|
import type {DeserializerRule} from '../../types'
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
import
|
|
7
|
-
import
|
|
4
|
+
import {createWordOnlineRules} from '../word-online/rules.word-online'
|
|
5
|
+
import {createGDocsRules} from './rules.gdocs'
|
|
6
|
+
import {createHTMLRules} from './rules.html'
|
|
7
|
+
import {createNotionRules} from './rules.notion'
|
|
8
|
+
import {createWordRules} from './rules.word'
|
|
8
9
|
|
|
9
10
|
export function createRules(
|
|
10
11
|
schema: Schema,
|
|
@@ -12,6 +13,7 @@ export function createRules(
|
|
|
12
13
|
): DeserializerRule[] {
|
|
13
14
|
return [
|
|
14
15
|
...createWordRules(),
|
|
16
|
+
...createWordOnlineRules(schema, options),
|
|
15
17
|
...createNotionRules(),
|
|
16
18
|
...createGDocsRules(schema),
|
|
17
19
|
...createHTMLRules(schema, options),
|
|
@@ -93,7 +93,7 @@ function getBlockStyle(schema: Schema, el: Node): string {
|
|
|
93
93
|
return block.style
|
|
94
94
|
}
|
|
95
95
|
|
|
96
|
-
export
|
|
96
|
+
export function createGDocsRules(schema: Schema): DeserializerRule[] {
|
|
97
97
|
return [
|
|
98
98
|
{
|
|
99
99
|
deserialize(el, next) {
|
|
@@ -14,9 +14,9 @@ import type {SchemaMatchers} from '../../schema-matchers'
|
|
|
14
14
|
import type {DeserializerRule} from '../../types'
|
|
15
15
|
import {keyGenerator} from '../../util/randomKey'
|
|
16
16
|
import {isElement, tagName} from '../helpers'
|
|
17
|
-
import {whitespaceTextNodeRule} from './whitespace-text-node'
|
|
17
|
+
import {whitespaceTextNodeRule} from './rules.whitespace-text-node'
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
function resolveListItem(
|
|
20
20
|
schema: Schema,
|
|
21
21
|
listNodeTagName: string,
|
|
22
22
|
): string | undefined {
|
|
@@ -35,7 +35,7 @@ export function resolveListItem(
|
|
|
35
35
|
return undefined
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
-
export
|
|
38
|
+
export function createHTMLRules(
|
|
39
39
|
schema: Schema,
|
|
40
40
|
options: {keyGenerator?: () => string; matchers?: SchemaMatchers},
|
|
41
41
|
): DeserializerRule[] {
|
|
@@ -27,7 +27,7 @@ function isNotion(el: Node): boolean {
|
|
|
27
27
|
return isElement(el) && Boolean(el.getAttribute('data-is-notion'))
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
-
export
|
|
30
|
+
export function createNotionRules(): DeserializerRule[] {
|
|
31
31
|
return [
|
|
32
32
|
{
|
|
33
33
|
deserialize(el) {
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import {
|
|
2
|
+
BLOCK_DEFAULT_STYLE,
|
|
3
|
+
DEFAULT_BLOCK,
|
|
4
|
+
HTML_HEADER_TAGS,
|
|
5
|
+
} from '../../constants'
|
|
6
|
+
import type {DeserializerRule} from '../../types'
|
|
7
|
+
import {isElement, tagName} from '../helpers'
|
|
8
|
+
|
|
9
|
+
function getListItemStyle(el: Node): string | undefined {
|
|
10
|
+
const style = isElement(el) && el.getAttribute('style')
|
|
11
|
+
if (!style) {
|
|
12
|
+
return undefined
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
if (!style.match(/lfo\d+/)) {
|
|
16
|
+
return undefined
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
return style.match('lfo1') ? 'number' : 'bullet'
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function getListItemLevel(el: Node): number | undefined {
|
|
23
|
+
const style = isElement(el) && el.getAttribute('style')
|
|
24
|
+
if (!style) {
|
|
25
|
+
return undefined
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const levelMatch = style.match(/level\d+/)
|
|
29
|
+
if (!levelMatch) {
|
|
30
|
+
return undefined
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const [level] = levelMatch[0].match(/\d/) || []
|
|
34
|
+
const levelNum = level ? Number.parseInt(level, 10) : 1
|
|
35
|
+
return levelNum || 1
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function isWordListElement(el: Node): boolean {
|
|
39
|
+
if (!isElement(el)) {
|
|
40
|
+
return false
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Check for specific class names
|
|
44
|
+
if (el.className) {
|
|
45
|
+
if (
|
|
46
|
+
el.className === 'MsoListParagraphCxSpFirst' ||
|
|
47
|
+
el.className === 'MsoListParagraphCxSpMiddle' ||
|
|
48
|
+
el.className === 'MsoListParagraphCxSpLast'
|
|
49
|
+
) {
|
|
50
|
+
return true
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Check for mso-list in style attribute
|
|
55
|
+
const style = el.getAttribute('style')
|
|
56
|
+
if (style && /mso-list:\s*l\d+\s+level\d+\s+lfo\d+/.test(style)) {
|
|
57
|
+
return true
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
return false
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function getHeadingStyle(el: Node): string | undefined {
|
|
64
|
+
const tag = tagName(el)
|
|
65
|
+
if (tag && HTML_HEADER_TAGS[tag]) {
|
|
66
|
+
return HTML_HEADER_TAGS[tag]?.style
|
|
67
|
+
}
|
|
68
|
+
return undefined
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export function createWordRules(): DeserializerRule[] {
|
|
72
|
+
return [
|
|
73
|
+
{
|
|
74
|
+
deserialize(el, next) {
|
|
75
|
+
const tag = tagName(el)
|
|
76
|
+
|
|
77
|
+
// Handle list items (both paragraphs and headings)
|
|
78
|
+
if (
|
|
79
|
+
(tag === 'p' || HTML_HEADER_TAGS[tag || '']) &&
|
|
80
|
+
isWordListElement(el)
|
|
81
|
+
) {
|
|
82
|
+
const headingStyle = getHeadingStyle(el)
|
|
83
|
+
return {
|
|
84
|
+
...DEFAULT_BLOCK,
|
|
85
|
+
listItem: getListItemStyle(el),
|
|
86
|
+
level: getListItemLevel(el),
|
|
87
|
+
style: headingStyle || BLOCK_DEFAULT_STYLE,
|
|
88
|
+
children: next(el.childNodes),
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return undefined
|
|
92
|
+
},
|
|
93
|
+
},
|
|
94
|
+
]
|
|
95
|
+
}
|