@portabletext/block-tools 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +226 -0
- package/lib/index.cjs +1056 -0
- package/lib/index.cjs.map +1 -0
- package/lib/index.d.cts +172 -0
- package/lib/index.d.ts +172 -0
- package/lib/index.js +1056 -0
- package/lib/index.js.map +1 -0
- package/package.json +71 -0
- package/src/HtmlDeserializer/helpers.ts +363 -0
- package/src/HtmlDeserializer/index.ts +313 -0
- package/src/HtmlDeserializer/preprocessors/gdocs.ts +86 -0
- package/src/HtmlDeserializer/preprocessors/html.ts +57 -0
- package/src/HtmlDeserializer/preprocessors/index.ts +13 -0
- package/src/HtmlDeserializer/preprocessors/notion.ts +25 -0
- package/src/HtmlDeserializer/preprocessors/whitespace.ts +31 -0
- package/src/HtmlDeserializer/preprocessors/word.ts +92 -0
- package/src/HtmlDeserializer/preprocessors/xpathResult.ts +13 -0
- package/src/HtmlDeserializer/rules/gdocs.ts +183 -0
- package/src/HtmlDeserializer/rules/html.ts +264 -0
- package/src/HtmlDeserializer/rules/index.ts +18 -0
- package/src/HtmlDeserializer/rules/notion.ts +60 -0
- package/src/HtmlDeserializer/rules/word.ts +59 -0
- package/src/constants.ts +104 -0
- package/src/index.ts +52 -0
- package/src/types.ts +139 -0
- package/src/util/blockContentTypeFeatures.ts +141 -0
- package/src/util/findBlockType.ts +13 -0
- package/src/util/normalizeBlock.ts +142 -0
- package/src/util/randomKey.ts +26 -0
- package/src/util/resolveJsType.ts +44 -0
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
import type {ArraySchemaType} from '@sanity/types'
|
|
2
|
+
import {
|
|
3
|
+
BLOCK_DEFAULT_STYLE,
|
|
4
|
+
DEFAULT_BLOCK,
|
|
5
|
+
DEFAULT_SPAN,
|
|
6
|
+
HTML_BLOCK_TAGS,
|
|
7
|
+
HTML_HEADER_TAGS,
|
|
8
|
+
HTML_LIST_CONTAINER_TAGS,
|
|
9
|
+
} from '../../constants'
|
|
10
|
+
import type {BlockEnabledFeatures, DeserializerRule} from '../../types'
|
|
11
|
+
import {isElement, tagName} from '../helpers'
|
|
12
|
+
|
|
13
|
+
const LIST_CONTAINER_TAGS = Object.keys(HTML_LIST_CONTAINER_TAGS)
|
|
14
|
+
|
|
15
|
+
// font-style:italic seems like the most important rule for italic / emphasis in their html
|
|
16
|
+
function isEmphasis(el: Node): boolean {
|
|
17
|
+
const style = isElement(el) && el.getAttribute('style')
|
|
18
|
+
return /font-style\s*:\s*italic/.test(style || '')
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// font-weight:700 seems like the most important rule for bold in their html
|
|
22
|
+
function isStrong(el: Node): boolean {
|
|
23
|
+
const style = isElement(el) && el.getAttribute('style')
|
|
24
|
+
return /font-weight\s*:\s*700/.test(style || '')
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// text-decoration seems like the most important rule for underline in their html
|
|
28
|
+
function isUnderline(el: Node): boolean {
|
|
29
|
+
if (!isElement(el) || tagName(el.parentNode) === 'a') {
|
|
30
|
+
return false
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const style = isElement(el) && el.getAttribute('style')
|
|
34
|
+
|
|
35
|
+
return /text-decoration\s*:\s*underline/.test(style || '')
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// text-decoration seems like the most important rule for strike-through in their html
|
|
39
|
+
// allows for line-through regex to be more lineient to allow for other text-decoration before or after
|
|
40
|
+
function isStrikethrough(el: Node): boolean {
|
|
41
|
+
const style = isElement(el) && el.getAttribute('style')
|
|
42
|
+
return /text-decoration\s*:\s*(?:.*line-through.*;)/.test(style || '')
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Check for attribute given by the gdocs preprocessor
|
|
46
|
+
function isGoogleDocs(el: Node): boolean {
|
|
47
|
+
return isElement(el) && Boolean(el.getAttribute('data-is-google-docs'))
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function isRootNode(el: Node): boolean {
|
|
51
|
+
return isElement(el) && Boolean(el.getAttribute('data-is-root-node'))
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function getListItemStyle(el: Node): 'bullet' | 'number' | undefined {
|
|
55
|
+
const parentTag = tagName(el.parentNode)
|
|
56
|
+
if (parentTag && !LIST_CONTAINER_TAGS.includes(parentTag)) {
|
|
57
|
+
return undefined
|
|
58
|
+
}
|
|
59
|
+
return tagName(el.parentNode) === 'ul' ? 'bullet' : 'number'
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function getListItemLevel(el: Node): number {
|
|
63
|
+
let level = 0
|
|
64
|
+
if (tagName(el) === 'li') {
|
|
65
|
+
let parentNode = el.parentNode
|
|
66
|
+
while (parentNode) {
|
|
67
|
+
const parentTag = tagName(parentNode)
|
|
68
|
+
if (parentTag && LIST_CONTAINER_TAGS.includes(parentTag)) {
|
|
69
|
+
level++
|
|
70
|
+
}
|
|
71
|
+
parentNode = parentNode.parentNode
|
|
72
|
+
}
|
|
73
|
+
} else {
|
|
74
|
+
level = 1
|
|
75
|
+
}
|
|
76
|
+
return level
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const blocks: Record<string, {style: string} | undefined> = {
|
|
80
|
+
...HTML_BLOCK_TAGS,
|
|
81
|
+
...HTML_HEADER_TAGS,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function getBlockStyle(el: Node, enabledBlockStyles: string[]): string {
|
|
85
|
+
const childTag = tagName(el.firstChild)
|
|
86
|
+
const block = childTag && blocks[childTag]
|
|
87
|
+
if (!block) {
|
|
88
|
+
return BLOCK_DEFAULT_STYLE
|
|
89
|
+
}
|
|
90
|
+
if (!enabledBlockStyles.includes(block.style)) {
|
|
91
|
+
return BLOCK_DEFAULT_STYLE
|
|
92
|
+
}
|
|
93
|
+
return block.style
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export default function createGDocsRules(
|
|
97
|
+
_blockContentType: ArraySchemaType,
|
|
98
|
+
options: BlockEnabledFeatures,
|
|
99
|
+
): DeserializerRule[] {
|
|
100
|
+
return [
|
|
101
|
+
{
|
|
102
|
+
deserialize(el) {
|
|
103
|
+
if (isElement(el) && tagName(el) === 'span' && isGoogleDocs(el)) {
|
|
104
|
+
const span = {
|
|
105
|
+
...DEFAULT_SPAN,
|
|
106
|
+
marks: [] as string[],
|
|
107
|
+
text: el.textContent,
|
|
108
|
+
}
|
|
109
|
+
if (isStrong(el)) {
|
|
110
|
+
span.marks.push('strong')
|
|
111
|
+
}
|
|
112
|
+
if (isUnderline(el)) {
|
|
113
|
+
span.marks.push('underline')
|
|
114
|
+
}
|
|
115
|
+
if (isStrikethrough(el)) {
|
|
116
|
+
span.marks.push('strike-through')
|
|
117
|
+
}
|
|
118
|
+
if (isEmphasis(el)) {
|
|
119
|
+
span.marks.push('em')
|
|
120
|
+
}
|
|
121
|
+
return span
|
|
122
|
+
}
|
|
123
|
+
return undefined
|
|
124
|
+
},
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
deserialize(el, next) {
|
|
128
|
+
if (tagName(el) === 'li' && isGoogleDocs(el)) {
|
|
129
|
+
return {
|
|
130
|
+
...DEFAULT_BLOCK,
|
|
131
|
+
listItem: getListItemStyle(el),
|
|
132
|
+
level: getListItemLevel(el),
|
|
133
|
+
style: getBlockStyle(el, options.enabledBlockStyles),
|
|
134
|
+
children: next(el.firstChild?.childNodes || []),
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return undefined
|
|
138
|
+
},
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
deserialize(el) {
|
|
142
|
+
if (
|
|
143
|
+
tagName(el) === 'br' &&
|
|
144
|
+
isGoogleDocs(el) &&
|
|
145
|
+
isElement(el) &&
|
|
146
|
+
el.classList.contains('apple-interchange-newline')
|
|
147
|
+
) {
|
|
148
|
+
return {
|
|
149
|
+
...DEFAULT_SPAN,
|
|
150
|
+
text: '',
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// BRs inside empty paragraphs
|
|
155
|
+
if (
|
|
156
|
+
tagName(el) === 'br' &&
|
|
157
|
+
isGoogleDocs(el) &&
|
|
158
|
+
isElement(el) &&
|
|
159
|
+
el?.parentNode?.textContent === ''
|
|
160
|
+
) {
|
|
161
|
+
return {
|
|
162
|
+
...DEFAULT_SPAN,
|
|
163
|
+
text: '',
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// BRs on the root
|
|
168
|
+
if (
|
|
169
|
+
tagName(el) === 'br' &&
|
|
170
|
+
isGoogleDocs(el) &&
|
|
171
|
+
isElement(el) &&
|
|
172
|
+
isRootNode(el)
|
|
173
|
+
) {
|
|
174
|
+
return {
|
|
175
|
+
...DEFAULT_SPAN,
|
|
176
|
+
text: '',
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
return undefined
|
|
180
|
+
},
|
|
181
|
+
},
|
|
182
|
+
]
|
|
183
|
+
}
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
import type {ArraySchemaType, TypedObject} from '@sanity/types'
|
|
2
|
+
import {
|
|
3
|
+
DEFAULT_BLOCK,
|
|
4
|
+
DEFAULT_SPAN,
|
|
5
|
+
HTML_BLOCK_TAGS,
|
|
6
|
+
HTML_DECORATOR_TAGS,
|
|
7
|
+
HTML_HEADER_TAGS,
|
|
8
|
+
HTML_LIST_CONTAINER_TAGS,
|
|
9
|
+
HTML_LIST_ITEM_TAGS,
|
|
10
|
+
HTML_SPAN_TAGS,
|
|
11
|
+
type PartialBlock,
|
|
12
|
+
} from '../../constants'
|
|
13
|
+
import type {BlockEnabledFeatures, DeserializerRule} from '../../types'
|
|
14
|
+
import {randomKey} from '../../util/randomKey'
|
|
15
|
+
import {isElement, tagName} from '../helpers'
|
|
16
|
+
|
|
17
|
+
export function resolveListItem(
|
|
18
|
+
listNodeTagName: string,
|
|
19
|
+
enabledListTypes: string[],
|
|
20
|
+
): string | undefined {
|
|
21
|
+
if (listNodeTagName === 'ul' && enabledListTypes.includes('bullet')) {
|
|
22
|
+
return 'bullet'
|
|
23
|
+
}
|
|
24
|
+
if (listNodeTagName === 'ol' && enabledListTypes.includes('number')) {
|
|
25
|
+
return 'number'
|
|
26
|
+
}
|
|
27
|
+
return undefined
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export default function createHTMLRules(
|
|
31
|
+
_blockContentType: ArraySchemaType,
|
|
32
|
+
options: BlockEnabledFeatures,
|
|
33
|
+
): DeserializerRule[] {
|
|
34
|
+
return [
|
|
35
|
+
// Text nodes
|
|
36
|
+
{
|
|
37
|
+
deserialize(el) {
|
|
38
|
+
if (tagName(el) === 'pre') {
|
|
39
|
+
return undefined
|
|
40
|
+
}
|
|
41
|
+
const isValidWhiteSpace =
|
|
42
|
+
el.nodeType === 3 &&
|
|
43
|
+
(el.textContent || '')
|
|
44
|
+
.replace(/[\r\n]/g, ' ')
|
|
45
|
+
.replace(/\s\s+/g, ' ') === ' ' &&
|
|
46
|
+
el.nextSibling &&
|
|
47
|
+
el.nextSibling.nodeType !== 3 &&
|
|
48
|
+
el.previousSibling &&
|
|
49
|
+
el.previousSibling.nodeType !== 3
|
|
50
|
+
const isValidText =
|
|
51
|
+
(isValidWhiteSpace || el.textContent !== ' ') &&
|
|
52
|
+
tagName(el.parentNode) !== 'body'
|
|
53
|
+
if (el.nodeName === '#text' && isValidText) {
|
|
54
|
+
return {
|
|
55
|
+
...DEFAULT_SPAN,
|
|
56
|
+
marks: [],
|
|
57
|
+
text: (el.textContent || '').replace(/\s\s+/g, ' '),
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return undefined
|
|
61
|
+
},
|
|
62
|
+
}, // Pre element
|
|
63
|
+
{
|
|
64
|
+
deserialize(el) {
|
|
65
|
+
if (tagName(el) !== 'pre') {
|
|
66
|
+
return undefined
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const isCodeEnabled = options.enabledBlockStyles.includes('code')
|
|
70
|
+
|
|
71
|
+
return {
|
|
72
|
+
_type: 'block',
|
|
73
|
+
style: 'normal',
|
|
74
|
+
markDefs: [],
|
|
75
|
+
children: [
|
|
76
|
+
{
|
|
77
|
+
...DEFAULT_SPAN,
|
|
78
|
+
marks: isCodeEnabled ? ['code'] : [],
|
|
79
|
+
text: el.textContent || '',
|
|
80
|
+
},
|
|
81
|
+
],
|
|
82
|
+
}
|
|
83
|
+
},
|
|
84
|
+
}, // Blockquote element
|
|
85
|
+
{
|
|
86
|
+
deserialize(el, next) {
|
|
87
|
+
if (tagName(el) !== 'blockquote') {
|
|
88
|
+
return undefined
|
|
89
|
+
}
|
|
90
|
+
const blocks: Record<string, PartialBlock | undefined> = {
|
|
91
|
+
...HTML_BLOCK_TAGS,
|
|
92
|
+
...HTML_HEADER_TAGS,
|
|
93
|
+
}
|
|
94
|
+
delete blocks.blockquote
|
|
95
|
+
|
|
96
|
+
const children: HTMLElement[] = []
|
|
97
|
+
el.childNodes.forEach((node, index) => {
|
|
98
|
+
if (
|
|
99
|
+
node.nodeType === 1 &&
|
|
100
|
+
Object.keys(blocks).includes(
|
|
101
|
+
(node as Element).localName.toLowerCase(),
|
|
102
|
+
)
|
|
103
|
+
) {
|
|
104
|
+
if (!el.ownerDocument) {
|
|
105
|
+
return
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const span = el.ownerDocument.createElement('span')
|
|
109
|
+
span.appendChild(el.ownerDocument.createTextNode('\r'))
|
|
110
|
+
node.childNodes.forEach((cn) => {
|
|
111
|
+
span.appendChild(cn.cloneNode(true))
|
|
112
|
+
})
|
|
113
|
+
if (index !== el.childNodes.length) {
|
|
114
|
+
span.appendChild(el.ownerDocument.createTextNode('\r'))
|
|
115
|
+
}
|
|
116
|
+
children.push(span)
|
|
117
|
+
} else {
|
|
118
|
+
children.push(node as HTMLElement)
|
|
119
|
+
}
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
return {
|
|
123
|
+
_type: 'block',
|
|
124
|
+
style: 'blockquote',
|
|
125
|
+
markDefs: [],
|
|
126
|
+
children: next(children),
|
|
127
|
+
}
|
|
128
|
+
},
|
|
129
|
+
}, // Block elements
|
|
130
|
+
{
|
|
131
|
+
deserialize(el, next) {
|
|
132
|
+
const blocks: Record<string, PartialBlock | undefined> = {
|
|
133
|
+
...HTML_BLOCK_TAGS,
|
|
134
|
+
...HTML_HEADER_TAGS,
|
|
135
|
+
}
|
|
136
|
+
const tag = tagName(el)
|
|
137
|
+
let block = tag ? blocks[tag] : undefined
|
|
138
|
+
if (!block) {
|
|
139
|
+
return undefined
|
|
140
|
+
}
|
|
141
|
+
// Don't add blocks into list items
|
|
142
|
+
if (el.parentNode && tagName(el.parentNode) === 'li') {
|
|
143
|
+
return next(el.childNodes)
|
|
144
|
+
}
|
|
145
|
+
// If style is not supported, return a defaultBlockType
|
|
146
|
+
if (!options.enabledBlockStyles.includes(block.style)) {
|
|
147
|
+
block = DEFAULT_BLOCK
|
|
148
|
+
}
|
|
149
|
+
return {
|
|
150
|
+
...block,
|
|
151
|
+
children: next(el.childNodes),
|
|
152
|
+
}
|
|
153
|
+
},
|
|
154
|
+
}, // Ignore span tags
|
|
155
|
+
{
|
|
156
|
+
deserialize(el, next) {
|
|
157
|
+
const tag = tagName(el)
|
|
158
|
+
if (!tag || !(tag in HTML_SPAN_TAGS)) {
|
|
159
|
+
return undefined
|
|
160
|
+
}
|
|
161
|
+
return next(el.childNodes)
|
|
162
|
+
},
|
|
163
|
+
}, // Ignore div tags
|
|
164
|
+
{
|
|
165
|
+
deserialize(el, next) {
|
|
166
|
+
const div = tagName(el) === 'div'
|
|
167
|
+
if (!div) {
|
|
168
|
+
return undefined
|
|
169
|
+
}
|
|
170
|
+
return next(el.childNodes)
|
|
171
|
+
},
|
|
172
|
+
}, // Ignore list containers
|
|
173
|
+
{
|
|
174
|
+
deserialize(el, next) {
|
|
175
|
+
const tag = tagName(el)
|
|
176
|
+
if (!tag || !(tag in HTML_LIST_CONTAINER_TAGS)) {
|
|
177
|
+
return undefined
|
|
178
|
+
}
|
|
179
|
+
return next(el.childNodes)
|
|
180
|
+
},
|
|
181
|
+
}, // Deal with br's
|
|
182
|
+
{
|
|
183
|
+
deserialize(el) {
|
|
184
|
+
if (tagName(el) === 'br') {
|
|
185
|
+
return {
|
|
186
|
+
...DEFAULT_SPAN,
|
|
187
|
+
text: '\n',
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
return undefined
|
|
191
|
+
},
|
|
192
|
+
}, // Deal with list items
|
|
193
|
+
{
|
|
194
|
+
deserialize(el, next, block) {
|
|
195
|
+
const tag = tagName(el)
|
|
196
|
+
const listItem = tag ? HTML_LIST_ITEM_TAGS[tag] : undefined
|
|
197
|
+
const parentTag = tagName(el.parentNode) || ''
|
|
198
|
+
if (
|
|
199
|
+
!listItem ||
|
|
200
|
+
!el.parentNode ||
|
|
201
|
+
!HTML_LIST_CONTAINER_TAGS[parentTag]
|
|
202
|
+
) {
|
|
203
|
+
return undefined
|
|
204
|
+
}
|
|
205
|
+
const enabledListItem = resolveListItem(
|
|
206
|
+
parentTag,
|
|
207
|
+
options.enabledListTypes,
|
|
208
|
+
)
|
|
209
|
+
// If the list item style is not supported, return a new default block
|
|
210
|
+
if (!enabledListItem) {
|
|
211
|
+
return block({_type: 'block', children: next(el.childNodes)})
|
|
212
|
+
}
|
|
213
|
+
listItem.listItem = enabledListItem
|
|
214
|
+
return {
|
|
215
|
+
...listItem,
|
|
216
|
+
children: next(el.childNodes),
|
|
217
|
+
}
|
|
218
|
+
},
|
|
219
|
+
}, // Deal with decorators - this is a limited set of known html elements that we know how to deserialize
|
|
220
|
+
{
|
|
221
|
+
deserialize(el, next) {
|
|
222
|
+
const decorator = HTML_DECORATOR_TAGS[tagName(el) || '']
|
|
223
|
+
if (!decorator || !options.enabledSpanDecorators.includes(decorator)) {
|
|
224
|
+
return undefined
|
|
225
|
+
}
|
|
226
|
+
return {
|
|
227
|
+
_type: '__decorator',
|
|
228
|
+
name: decorator,
|
|
229
|
+
children: next(el.childNodes),
|
|
230
|
+
}
|
|
231
|
+
},
|
|
232
|
+
}, // Special case for hyperlinks, add annotation (if allowed by schema),
|
|
233
|
+
// If not supported just write out the link text and href in plain text.
|
|
234
|
+
{
|
|
235
|
+
deserialize(el, next) {
|
|
236
|
+
if (tagName(el) !== 'a') {
|
|
237
|
+
return undefined
|
|
238
|
+
}
|
|
239
|
+
const linkEnabled = options.enabledBlockAnnotations.includes('link')
|
|
240
|
+
const href = isElement(el) && el.getAttribute('href')
|
|
241
|
+
if (!href) {
|
|
242
|
+
return next(el.childNodes)
|
|
243
|
+
}
|
|
244
|
+
let markDef: TypedObject | undefined
|
|
245
|
+
if (linkEnabled) {
|
|
246
|
+
markDef = {
|
|
247
|
+
_key: randomKey(12),
|
|
248
|
+
_type: 'link',
|
|
249
|
+
href: href,
|
|
250
|
+
}
|
|
251
|
+
return {
|
|
252
|
+
_type: '__annotation',
|
|
253
|
+
markDef: markDef,
|
|
254
|
+
children: next(el.childNodes),
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
return (
|
|
258
|
+
el.appendChild(el.ownerDocument.createTextNode(` (${href})`)) &&
|
|
259
|
+
next(el.childNodes)
|
|
260
|
+
)
|
|
261
|
+
},
|
|
262
|
+
},
|
|
263
|
+
]
|
|
264
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type {ArraySchemaType} from '@sanity/types'
|
|
2
|
+
import type {BlockEnabledFeatures, DeserializerRule} from '../../types'
|
|
3
|
+
import createGDocsRules from './gdocs'
|
|
4
|
+
import createHTMLRules from './html'
|
|
5
|
+
import createNotionRules from './notion'
|
|
6
|
+
import createWordRules from './word'
|
|
7
|
+
|
|
8
|
+
export function createRules(
|
|
9
|
+
blockContentType: ArraySchemaType,
|
|
10
|
+
options: BlockEnabledFeatures,
|
|
11
|
+
): DeserializerRule[] {
|
|
12
|
+
return [
|
|
13
|
+
...createWordRules(),
|
|
14
|
+
...createNotionRules(blockContentType),
|
|
15
|
+
...createGDocsRules(blockContentType, options),
|
|
16
|
+
...createHTMLRules(blockContentType, options),
|
|
17
|
+
]
|
|
18
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import type {ArraySchemaType} from '@sanity/types'
|
|
2
|
+
import {DEFAULT_SPAN} from '../../constants'
|
|
3
|
+
import type {DeserializerRule} from '../../types'
|
|
4
|
+
import {isElement, tagName} from '../helpers'
|
|
5
|
+
|
|
6
|
+
// font-style:italic seems like the most important rule for italic / emphasis in their html
|
|
7
|
+
function isEmphasis(el: Node): boolean {
|
|
8
|
+
const style = isElement(el) && el.getAttribute('style')
|
|
9
|
+
return /font-style:italic/.test(style || '')
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
// font-weight:700 or 600 seems like the most important rule for bold in their html
|
|
13
|
+
function isStrong(el: Node): boolean {
|
|
14
|
+
const style = isElement(el) && el.getAttribute('style')
|
|
15
|
+
return (
|
|
16
|
+
/font-weight:700/.test(style || '') || /font-weight:600/.test(style || '')
|
|
17
|
+
)
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// text-decoration seems like the most important rule for underline in their html
|
|
21
|
+
function isUnderline(el: Node): boolean {
|
|
22
|
+
const style = isElement(el) && el.getAttribute('style')
|
|
23
|
+
return /text-decoration:underline/.test(style || '')
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Check for attribute given by the Notion preprocessor
|
|
27
|
+
function isNotion(el: Node): boolean {
|
|
28
|
+
return isElement(el) && Boolean(el.getAttribute('data-is-notion'))
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export default function createNotionRules(
|
|
32
|
+
_blockContentType: ArraySchemaType,
|
|
33
|
+
): DeserializerRule[] {
|
|
34
|
+
return [
|
|
35
|
+
{
|
|
36
|
+
deserialize(el) {
|
|
37
|
+
// Notion normally exports semantic HTML. However, if you copy a single block, the formatting will be inline styles
|
|
38
|
+
// This handles a limited set of styles
|
|
39
|
+
if (isElement(el) && tagName(el) === 'span' && isNotion(el)) {
|
|
40
|
+
const span = {
|
|
41
|
+
...DEFAULT_SPAN,
|
|
42
|
+
marks: [] as string[],
|
|
43
|
+
text: el.textContent,
|
|
44
|
+
}
|
|
45
|
+
if (isStrong(el)) {
|
|
46
|
+
span.marks.push('strong')
|
|
47
|
+
}
|
|
48
|
+
if (isUnderline(el)) {
|
|
49
|
+
span.marks.push('underline')
|
|
50
|
+
}
|
|
51
|
+
if (isEmphasis(el)) {
|
|
52
|
+
span.marks.push('em')
|
|
53
|
+
}
|
|
54
|
+
return span
|
|
55
|
+
}
|
|
56
|
+
return undefined
|
|
57
|
+
},
|
|
58
|
+
},
|
|
59
|
+
]
|
|
60
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import {BLOCK_DEFAULT_STYLE, DEFAULT_BLOCK} from '../../constants'
|
|
2
|
+
import type {DeserializerRule} from '../../types'
|
|
3
|
+
import {isElement, tagName} from '../helpers'
|
|
4
|
+
|
|
5
|
+
function getListItemStyle(el: Node): string | undefined {
|
|
6
|
+
const style = isElement(el) && el.getAttribute('style')
|
|
7
|
+
if (!style) {
|
|
8
|
+
return undefined
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
if (!style.match(/lfo\d+/)) {
|
|
12
|
+
return undefined
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
return style.match('lfo1') ? 'bullet' : 'number'
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function getListItemLevel(el: Node): number | undefined {
|
|
19
|
+
const style = isElement(el) && el.getAttribute('style')
|
|
20
|
+
if (!style) {
|
|
21
|
+
return undefined
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const levelMatch = style.match(/level\d+/)
|
|
25
|
+
if (!levelMatch) {
|
|
26
|
+
return undefined
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const [level] = levelMatch[0].match(/\d/) || []
|
|
30
|
+
const levelNum = level ? Number.parseInt(level, 10) : 1
|
|
31
|
+
return levelNum || 1
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function isWordListElement(el: Node): boolean {
|
|
35
|
+
return isElement(el) && el.className
|
|
36
|
+
? el.className === 'MsoListParagraphCxSpFirst' ||
|
|
37
|
+
el.className === 'MsoListParagraphCxSpMiddle' ||
|
|
38
|
+
el.className === 'MsoListParagraphCxSpLast'
|
|
39
|
+
: false
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export default function createWordRules(): DeserializerRule[] {
|
|
43
|
+
return [
|
|
44
|
+
{
|
|
45
|
+
deserialize(el, next) {
|
|
46
|
+
if (tagName(el) === 'p' && isWordListElement(el)) {
|
|
47
|
+
return {
|
|
48
|
+
...DEFAULT_BLOCK,
|
|
49
|
+
listItem: getListItemStyle(el),
|
|
50
|
+
level: getListItemLevel(el),
|
|
51
|
+
style: BLOCK_DEFAULT_STYLE,
|
|
52
|
+
children: next(el.childNodes),
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return undefined
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
]
|
|
59
|
+
}
|
package/src/constants.ts
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import {uniq} from 'lodash'
|
|
2
|
+
|
|
3
|
+
export interface PartialBlock {
|
|
4
|
+
_type: string
|
|
5
|
+
markDefs: string[]
|
|
6
|
+
style: string
|
|
7
|
+
level?: number
|
|
8
|
+
listItem?: string
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export const PRESERVE_WHITESPACE_TAGS = ['pre', 'textarea', 'code']
|
|
12
|
+
|
|
13
|
+
export const BLOCK_DEFAULT_STYLE = 'normal'
|
|
14
|
+
|
|
15
|
+
export const DEFAULT_BLOCK: PartialBlock = Object.freeze({
|
|
16
|
+
_type: 'block',
|
|
17
|
+
markDefs: [],
|
|
18
|
+
style: BLOCK_DEFAULT_STYLE,
|
|
19
|
+
})
|
|
20
|
+
|
|
21
|
+
export const DEFAULT_SPAN = Object.freeze({
|
|
22
|
+
_type: 'span',
|
|
23
|
+
marks: [] as string[],
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
export const HTML_BLOCK_TAGS = {
|
|
27
|
+
p: DEFAULT_BLOCK,
|
|
28
|
+
blockquote: {...DEFAULT_BLOCK, style: 'blockquote'} as PartialBlock,
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export const HTML_SPAN_TAGS = {
|
|
32
|
+
span: {object: 'text'},
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export const HTML_LIST_CONTAINER_TAGS: Record<
|
|
36
|
+
string,
|
|
37
|
+
{object: null} | undefined
|
|
38
|
+
> = {
|
|
39
|
+
ol: {object: null},
|
|
40
|
+
ul: {object: null},
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export const HTML_HEADER_TAGS: Record<string, PartialBlock | undefined> = {
|
|
44
|
+
h1: {...DEFAULT_BLOCK, style: 'h1'},
|
|
45
|
+
h2: {...DEFAULT_BLOCK, style: 'h2'},
|
|
46
|
+
h3: {...DEFAULT_BLOCK, style: 'h3'},
|
|
47
|
+
h4: {...DEFAULT_BLOCK, style: 'h4'},
|
|
48
|
+
h5: {...DEFAULT_BLOCK, style: 'h5'},
|
|
49
|
+
h6: {...DEFAULT_BLOCK, style: 'h6'},
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export const HTML_MISC_TAGS = {
|
|
53
|
+
br: {...DEFAULT_BLOCK, style: BLOCK_DEFAULT_STYLE} as PartialBlock,
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export const HTML_DECORATOR_TAGS: Record<string, string | undefined> = {
|
|
57
|
+
b: 'strong',
|
|
58
|
+
strong: 'strong',
|
|
59
|
+
|
|
60
|
+
i: 'em',
|
|
61
|
+
em: 'em',
|
|
62
|
+
|
|
63
|
+
u: 'underline',
|
|
64
|
+
s: 'strike-through',
|
|
65
|
+
strike: 'strike-through',
|
|
66
|
+
del: 'strike-through',
|
|
67
|
+
|
|
68
|
+
code: 'code',
|
|
69
|
+
sup: 'sup',
|
|
70
|
+
sub: 'sub',
|
|
71
|
+
ins: 'ins',
|
|
72
|
+
mark: 'mark',
|
|
73
|
+
small: 'small',
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export const HTML_LIST_ITEM_TAGS: Record<string, PartialBlock | undefined> = {
|
|
77
|
+
li: {
|
|
78
|
+
...DEFAULT_BLOCK,
|
|
79
|
+
style: BLOCK_DEFAULT_STYLE,
|
|
80
|
+
level: 1,
|
|
81
|
+
listItem: 'bullet',
|
|
82
|
+
},
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export const ELEMENT_MAP = {
|
|
86
|
+
...HTML_BLOCK_TAGS,
|
|
87
|
+
...HTML_SPAN_TAGS,
|
|
88
|
+
...HTML_LIST_CONTAINER_TAGS,
|
|
89
|
+
...HTML_LIST_ITEM_TAGS,
|
|
90
|
+
...HTML_HEADER_TAGS,
|
|
91
|
+
...HTML_MISC_TAGS,
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export const DEFAULT_SUPPORTED_STYLES = uniq(
|
|
95
|
+
Object.values(ELEMENT_MAP)
|
|
96
|
+
.filter((tag): tag is PartialBlock => 'style' in tag)
|
|
97
|
+
.map((tag) => tag.style),
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
export const DEFAULT_SUPPORTED_DECORATORS = uniq(
|
|
101
|
+
Object.values(HTML_DECORATOR_TAGS),
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
export const DEFAULT_SUPPORTED_ANNOTATIONS = ['link']
|