@portabletext/block-tools 4.1.8 → 4.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/package.json +5 -6
  2. package/src/HtmlDeserializer/flatten-nested-blocks.test.ts +0 -248
  3. package/src/HtmlDeserializer/flatten-nested-blocks.ts +0 -173
  4. package/src/HtmlDeserializer/helpers.ts +0 -108
  5. package/src/HtmlDeserializer/index.ts +0 -315
  6. package/src/HtmlDeserializer/preprocessors/index.ts +0 -15
  7. package/src/HtmlDeserializer/preprocessors/preprocessor.gdocs.ts +0 -66
  8. package/src/HtmlDeserializer/preprocessors/preprocessor.html.ts +0 -57
  9. package/src/HtmlDeserializer/preprocessors/preprocessor.notion.ts +0 -25
  10. package/src/HtmlDeserializer/preprocessors/preprocessor.whitespace.ts +0 -56
  11. package/src/HtmlDeserializer/preprocessors/preprocessor.word.ts +0 -92
  12. package/src/HtmlDeserializer/preprocessors/xpathResult.ts +0 -13
  13. package/src/HtmlDeserializer/rules/index.ts +0 -21
  14. package/src/HtmlDeserializer/rules/rules.gdocs.ts +0 -188
  15. package/src/HtmlDeserializer/rules/rules.html.ts +0 -356
  16. package/src/HtmlDeserializer/rules/rules.notion.ts +0 -57
  17. package/src/HtmlDeserializer/rules/rules.whitespace-text-node.ts +0 -31
  18. package/src/HtmlDeserializer/rules/rules.word.ts +0 -95
  19. package/src/HtmlDeserializer/trim-whitespace.ts +0 -157
  20. package/src/HtmlDeserializer/word-online/asserters.word-online.ts +0 -153
  21. package/src/HtmlDeserializer/word-online/preprocessor.word-online.ts +0 -263
  22. package/src/HtmlDeserializer/word-online/rules.word-online.ts +0 -390
  23. package/src/constants.ts +0 -104
  24. package/src/index.ts +0 -49
  25. package/src/rules/_exports/index.ts +0 -1
  26. package/src/rules/flatten-tables.test.ts +0 -495
  27. package/src/rules/flatten-tables.ts +0 -216
  28. package/src/rules/index.ts +0 -1
  29. package/src/schema-matchers.ts +0 -41
  30. package/src/types.ts +0 -100
  31. package/src/util/findBlockType.ts +0 -13
  32. package/src/util/normalizeBlock.ts +0 -171
  33. package/src/util/randomKey.ts +0 -28
  34. package/src/util/resolveJsType.ts +0 -44
@@ -1,188 +0,0 @@
1
- import type {Schema} from '@portabletext/schema'
2
- import {
3
- BLOCK_DEFAULT_STYLE,
4
- DEFAULT_BLOCK,
5
- DEFAULT_SPAN,
6
- HTML_BLOCK_TAGS,
7
- HTML_HEADER_TAGS,
8
- HTML_LIST_CONTAINER_TAGS,
9
- } from '../../constants'
10
- import type {DeserializerRule} from '../../types'
11
- import {isElement, tagName} from '../helpers'
12
-
13
- const LIST_CONTAINER_TAGS = Object.keys(HTML_LIST_CONTAINER_TAGS)
14
-
15
- // font-style:italic seems like the most important rule for italic / emphasis in their html
16
- function isEmphasis(el: Node): boolean {
17
- const style = isElement(el) && el.getAttribute('style')
18
- return /font-style\s*:\s*italic/.test(style || '')
19
- }
20
-
21
- // font-weight:700 seems like the most important rule for bold in their html
22
- function isStrong(el: Node): boolean {
23
- const style = isElement(el) && el.getAttribute('style')
24
- return /font-weight\s*:\s*700/.test(style || '')
25
- }
26
-
27
- // text-decoration seems like the most important rule for underline in their html
28
- function isUnderline(el: Node): boolean {
29
- if (!isElement(el) || tagName(el.parentNode) === 'a') {
30
- return false
31
- }
32
-
33
- const style = isElement(el) && el.getAttribute('style')
34
-
35
- return /text-decoration\s*:\s*underline/.test(style || '')
36
- }
37
-
38
- // text-decoration seems like the most important rule for strike-through in their html
39
- // allows for line-through regex to be more lineient to allow for other text-decoration before or after
40
- function isStrikethrough(el: Node): boolean {
41
- const style = isElement(el) && el.getAttribute('style')
42
- return /text-decoration\s*:\s*(?:.*line-through.*;)/.test(style || '')
43
- }
44
-
45
- // Check for attribute given by the gdocs preprocessor
46
- function isGoogleDocs(el: Node): boolean {
47
- return isElement(el) && Boolean(el.getAttribute('data-is-google-docs'))
48
- }
49
-
50
- function isRootNode(el: Node): boolean {
51
- return isElement(el) && Boolean(el.getAttribute('data-is-root-node'))
52
- }
53
-
54
- function getListItemStyle(el: Node): 'bullet' | 'number' | undefined {
55
- const parentTag = tagName(el.parentNode)
56
- if (parentTag && !LIST_CONTAINER_TAGS.includes(parentTag)) {
57
- return undefined
58
- }
59
- return tagName(el.parentNode) === 'ul' ? 'bullet' : 'number'
60
- }
61
-
62
- function getListItemLevel(el: Node): number {
63
- let level = 0
64
- if (tagName(el) === 'li') {
65
- let parentNode = el.parentNode
66
- while (parentNode) {
67
- const parentTag = tagName(parentNode)
68
- if (parentTag && LIST_CONTAINER_TAGS.includes(parentTag)) {
69
- level++
70
- }
71
- parentNode = parentNode.parentNode
72
- }
73
- } else {
74
- level = 1
75
- }
76
- return level
77
- }
78
-
79
- const blocks: Record<string, {style: string} | undefined> = {
80
- ...HTML_BLOCK_TAGS,
81
- ...HTML_HEADER_TAGS,
82
- }
83
-
84
- function getBlockStyle(schema: Schema, el: Node): string {
85
- const childTag = tagName(el.firstChild)
86
- const block = childTag && blocks[childTag]
87
- if (!block) {
88
- return BLOCK_DEFAULT_STYLE
89
- }
90
- if (!schema.styles.some((style) => style.name === block.style)) {
91
- return BLOCK_DEFAULT_STYLE
92
- }
93
- return block.style
94
- }
95
-
96
- export function createGDocsRules(schema: Schema): DeserializerRule[] {
97
- return [
98
- {
99
- deserialize(el, next) {
100
- if (isElement(el) && tagName(el) === 'span' && isGoogleDocs(el)) {
101
- if (!el.textContent) {
102
- if (!el.previousSibling && !el.nextSibling) {
103
- el.setAttribute('data-lonely-child', 'true')
104
- }
105
-
106
- return next(el.childNodes)
107
- }
108
-
109
- const span = {
110
- ...DEFAULT_SPAN,
111
- marks: [] as string[],
112
- text: el.textContent,
113
- }
114
- if (isStrong(el)) {
115
- span.marks.push('strong')
116
- }
117
- if (isUnderline(el)) {
118
- span.marks.push('underline')
119
- }
120
- if (isStrikethrough(el)) {
121
- span.marks.push('strike-through')
122
- }
123
- if (isEmphasis(el)) {
124
- span.marks.push('em')
125
- }
126
- return span
127
- }
128
- return undefined
129
- },
130
- },
131
- {
132
- deserialize(el, next) {
133
- if (tagName(el) === 'li' && isGoogleDocs(el)) {
134
- return {
135
- ...DEFAULT_BLOCK,
136
- listItem: getListItemStyle(el),
137
- level: getListItemLevel(el),
138
- style: getBlockStyle(schema, el),
139
- children: next(el.firstChild?.childNodes || []),
140
- }
141
- }
142
- return undefined
143
- },
144
- },
145
- {
146
- deserialize(el) {
147
- if (
148
- tagName(el) === 'br' &&
149
- isGoogleDocs(el) &&
150
- isElement(el) &&
151
- el.classList.contains('apple-interchange-newline')
152
- ) {
153
- return {
154
- ...DEFAULT_SPAN,
155
- text: '',
156
- }
157
- }
158
-
159
- // BRs inside empty paragraphs
160
- if (
161
- tagName(el) === 'br' &&
162
- isGoogleDocs(el) &&
163
- isElement(el) &&
164
- el?.parentNode?.textContent === ''
165
- ) {
166
- return {
167
- ...DEFAULT_SPAN,
168
- text: '',
169
- }
170
- }
171
-
172
- // BRs on the root
173
- if (
174
- tagName(el) === 'br' &&
175
- isGoogleDocs(el) &&
176
- isElement(el) &&
177
- isRootNode(el)
178
- ) {
179
- return {
180
- ...DEFAULT_SPAN,
181
- text: '',
182
- }
183
- }
184
- return undefined
185
- },
186
- },
187
- ]
188
- }
@@ -1,356 +0,0 @@
1
- import type {Schema} from '@portabletext/schema'
2
- import {
3
- DEFAULT_BLOCK,
4
- DEFAULT_SPAN,
5
- HTML_BLOCK_TAGS,
6
- HTML_DECORATOR_TAGS,
7
- HTML_HEADER_TAGS,
8
- HTML_LIST_CONTAINER_TAGS,
9
- HTML_LIST_ITEM_TAGS,
10
- HTML_SPAN_TAGS,
11
- type PartialBlock,
12
- } from '../../constants'
13
- import type {SchemaMatchers} from '../../schema-matchers'
14
- import type {DeserializerRule} from '../../types'
15
- import {keyGenerator} from '../../util/randomKey'
16
- import {isElement, tagName} from '../helpers'
17
- import {whitespaceTextNodeRule} from './rules.whitespace-text-node'
18
-
19
- function resolveListItem(
20
- schema: Schema,
21
- listNodeTagName: string,
22
- ): string | undefined {
23
- if (
24
- listNodeTagName === 'ul' &&
25
- schema.lists.some((list) => list.name === 'bullet')
26
- ) {
27
- return 'bullet'
28
- }
29
- if (
30
- listNodeTagName === 'ol' &&
31
- schema.lists.some((list) => list.name === 'number')
32
- ) {
33
- return 'number'
34
- }
35
- return undefined
36
- }
37
-
38
- export function createHTMLRules(
39
- schema: Schema,
40
- options: {keyGenerator?: () => string; matchers?: SchemaMatchers},
41
- ): DeserializerRule[] {
42
- return [
43
- whitespaceTextNodeRule,
44
- {
45
- // Pre element
46
- deserialize(el) {
47
- if (tagName(el) !== 'pre') {
48
- return undefined
49
- }
50
-
51
- const isCodeEnabled = schema.styles.some(
52
- (style) => style.name === 'code',
53
- )
54
-
55
- return {
56
- _type: 'block',
57
- style: 'normal',
58
- markDefs: [],
59
- children: [
60
- {
61
- ...DEFAULT_SPAN,
62
- marks: isCodeEnabled ? ['code'] : [],
63
- text: el.textContent || '',
64
- },
65
- ],
66
- }
67
- },
68
- }, // Blockquote element
69
- {
70
- deserialize(el, next) {
71
- if (tagName(el) !== 'blockquote') {
72
- return undefined
73
- }
74
- const blocks: Record<string, PartialBlock | undefined> = {
75
- ...HTML_BLOCK_TAGS,
76
- ...HTML_HEADER_TAGS,
77
- }
78
- delete blocks.blockquote
79
- const nonBlockquoteBlocks = Object.keys(blocks)
80
-
81
- const children: HTMLElement[] = []
82
-
83
- el.childNodes.forEach((node, index) => {
84
- if (!el.ownerDocument) {
85
- return
86
- }
87
-
88
- if (
89
- node.nodeType === 1 &&
90
- nonBlockquoteBlocks.includes(
91
- (node as Element).localName.toLowerCase(),
92
- )
93
- ) {
94
- const span = el.ownerDocument.createElement('span')
95
-
96
- const previousChild = children[children.length - 1]
97
-
98
- if (
99
- previousChild &&
100
- previousChild.nodeType === 3 &&
101
- previousChild.textContent?.trim()
102
- ) {
103
- // Only prepend line break if the previous node is a non-empty
104
- // text node.
105
- span.appendChild(el.ownerDocument.createTextNode('\r'))
106
- }
107
-
108
- node.childNodes.forEach((cn) => {
109
- span.appendChild(cn.cloneNode(true))
110
- })
111
-
112
- if (index !== el.childNodes.length) {
113
- // Only append line break if this is not the last child
114
- span.appendChild(el.ownerDocument.createTextNode('\r'))
115
- }
116
-
117
- children.push(span)
118
- } else {
119
- children.push(node as HTMLElement)
120
- }
121
- })
122
-
123
- return {
124
- _type: 'block',
125
- style: 'blockquote',
126
- markDefs: [],
127
- children: next(children),
128
- }
129
- },
130
- }, // Block elements
131
- {
132
- deserialize(el, next) {
133
- const blocks: Record<string, PartialBlock | undefined> = {
134
- ...HTML_BLOCK_TAGS,
135
- ...HTML_HEADER_TAGS,
136
- }
137
- const tag = tagName(el)
138
- let block = tag ? blocks[tag] : undefined
139
- if (!block) {
140
- return undefined
141
- }
142
- // Don't add blocks into list items
143
- if (el.parentNode && tagName(el.parentNode) === 'li') {
144
- return next(el.childNodes)
145
- }
146
- const blockStyle = block.style
147
- // If style is not supported, return a defaultBlockType
148
- if (!schema.styles.some((style) => style.name === blockStyle)) {
149
- block = DEFAULT_BLOCK
150
- }
151
- return {
152
- ...block,
153
- children: next(el.childNodes),
154
- }
155
- },
156
- }, // Ignore span tags
157
- {
158
- deserialize(el, next) {
159
- const tag = tagName(el)
160
- if (!tag || !(tag in HTML_SPAN_TAGS)) {
161
- return undefined
162
- }
163
- return next(el.childNodes)
164
- },
165
- }, // Ignore div tags
166
- {
167
- deserialize(el, next) {
168
- const div = tagName(el) === 'div'
169
- if (!div) {
170
- return undefined
171
- }
172
- return next(el.childNodes)
173
- },
174
- }, // Ignore list containers
175
- {
176
- deserialize(el, next) {
177
- const tag = tagName(el)
178
- if (!tag || !(tag in HTML_LIST_CONTAINER_TAGS)) {
179
- return undefined
180
- }
181
- return next(el.childNodes)
182
- },
183
- }, // Deal with br's
184
- {
185
- deserialize(el) {
186
- if (tagName(el) === 'br') {
187
- return {
188
- ...DEFAULT_SPAN,
189
- text: '\n',
190
- }
191
- }
192
- return undefined
193
- },
194
- }, // Deal with list items
195
- {
196
- deserialize(el, next, block) {
197
- const tag = tagName(el)
198
- const listItem = tag ? HTML_LIST_ITEM_TAGS[tag] : undefined
199
- const parentTag = tagName(el.parentNode) || ''
200
- if (
201
- !listItem ||
202
- !el.parentNode ||
203
- !HTML_LIST_CONTAINER_TAGS[parentTag]
204
- ) {
205
- return undefined
206
- }
207
- const enabledListItem = resolveListItem(schema, parentTag)
208
- // If the list item style is not supported, return a new default block
209
- if (!enabledListItem) {
210
- return block({_type: 'block', children: next(el.childNodes)})
211
- }
212
- listItem.listItem = enabledListItem
213
- return {
214
- ...listItem,
215
- children: next(el.childNodes),
216
- }
217
- },
218
- }, // Deal with decorators - this is a limited set of known html elements that we know how to deserialize
219
- {
220
- deserialize(el, next) {
221
- const decorator = HTML_DECORATOR_TAGS[tagName(el) || '']
222
- if (
223
- !decorator ||
224
- !schema.decorators.some(
225
- (decoratorType) => decoratorType.name === decorator,
226
- )
227
- ) {
228
- return undefined
229
- }
230
- return {
231
- _type: '__decorator',
232
- name: decorator,
233
- children: next(el.childNodes),
234
- }
235
- },
236
- }, // Special case for hyperlinks, add annotation (if allowed by schema),
237
- // If not supported just write out the link text and href in plain text.
238
- {
239
- deserialize(el, next) {
240
- if (tagName(el) !== 'a') {
241
- return undefined
242
- }
243
- const linkEnabled = schema.annotations.some(
244
- (annotation) => annotation.name === 'link',
245
- )
246
- const href = isElement(el) && el.getAttribute('href')
247
- if (!href) {
248
- return next(el.childNodes)
249
- }
250
- if (linkEnabled) {
251
- return {
252
- _type: '__annotation',
253
- markDef: {
254
- _key: options.keyGenerator
255
- ? options.keyGenerator()
256
- : keyGenerator(),
257
- _type: 'link',
258
- href: href,
259
- },
260
- children: next(el.childNodes),
261
- }
262
- }
263
- return (
264
- el.appendChild(el.ownerDocument.createTextNode(` (${href})`)) &&
265
- next(el.childNodes)
266
- )
267
- },
268
- },
269
- {
270
- deserialize(el, next) {
271
- if (isElement(el) && (tagName(el) === 'td' || tagName(el) === 'th')) {
272
- return {
273
- ...DEFAULT_BLOCK,
274
- children: next(el.childNodes),
275
- }
276
- }
277
-
278
- return undefined
279
- },
280
- },
281
- {
282
- deserialize(el) {
283
- if (isElement(el) && tagName(el) === 'img') {
284
- const src = el.getAttribute('src') ?? undefined
285
- const alt = el.getAttribute('alt') ?? undefined
286
-
287
- const props = Object.fromEntries(
288
- Array.from(el.attributes).map((attr) => [attr.name, attr.value]),
289
- )
290
-
291
- const ancestorOfLonelyChild =
292
- el?.parentElement?.parentElement?.getAttribute('data-lonely-child')
293
- const ancestorOfListItem = el.closest('li') !== null
294
-
295
- if (ancestorOfLonelyChild && !ancestorOfListItem) {
296
- const image = options.matchers?.image?.({
297
- context: {
298
- schema,
299
- keyGenerator: options.keyGenerator ?? keyGenerator,
300
- },
301
- props: {
302
- ...props,
303
- ...(src ? {src} : {}),
304
- ...(alt ? {alt} : {}),
305
- },
306
- })
307
-
308
- if (image) {
309
- return {
310
- _type: '__block',
311
- block: image,
312
- }
313
- }
314
- }
315
-
316
- const inlineImage = options.matchers?.inlineImage?.({
317
- context: {
318
- schema,
319
- keyGenerator: options.keyGenerator ?? keyGenerator,
320
- },
321
- props: {
322
- ...props,
323
- ...(src ? {src} : {}),
324
- ...(alt ? {alt} : {}),
325
- },
326
- })
327
-
328
- if (inlineImage) {
329
- return inlineImage
330
- }
331
-
332
- const image = options.matchers?.image?.({
333
- context: {
334
- schema,
335
- keyGenerator: options.keyGenerator ?? keyGenerator,
336
- },
337
- props: {
338
- ...props,
339
- ...(src ? {src} : {}),
340
- ...(alt ? {alt} : {}),
341
- },
342
- })
343
-
344
- if (image) {
345
- return {
346
- _type: '__block',
347
- block: image,
348
- }
349
- }
350
- }
351
-
352
- return undefined
353
- },
354
- },
355
- ]
356
- }
@@ -1,57 +0,0 @@
1
- import {DEFAULT_SPAN} from '../../constants'
2
- import type {DeserializerRule} from '../../types'
3
- import {isElement, tagName} from '../helpers'
4
-
5
- // font-style:italic seems like the most important rule for italic / emphasis in their html
6
- function isEmphasis(el: Node): boolean {
7
- const style = isElement(el) && el.getAttribute('style')
8
- return /font-style:italic/.test(style || '')
9
- }
10
-
11
- // font-weight:700 or 600 seems like the most important rule for bold in their html
12
- function isStrong(el: Node): boolean {
13
- const style = isElement(el) && el.getAttribute('style')
14
- return (
15
- /font-weight:700/.test(style || '') || /font-weight:600/.test(style || '')
16
- )
17
- }
18
-
19
- // text-decoration seems like the most important rule for underline in their html
20
- function isUnderline(el: Node): boolean {
21
- const style = isElement(el) && el.getAttribute('style')
22
- return /text-decoration:underline/.test(style || '')
23
- }
24
-
25
- // Check for attribute given by the Notion preprocessor
26
- function isNotion(el: Node): boolean {
27
- return isElement(el) && Boolean(el.getAttribute('data-is-notion'))
28
- }
29
-
30
- export function createNotionRules(): DeserializerRule[] {
31
- return [
32
- {
33
- deserialize(el) {
34
- // Notion normally exports semantic HTML. However, if you copy a single block, the formatting will be inline styles
35
- // This handles a limited set of styles
36
- if (isElement(el) && tagName(el) === 'span' && isNotion(el)) {
37
- const span = {
38
- ...DEFAULT_SPAN,
39
- marks: [] as string[],
40
- text: el.textContent,
41
- }
42
- if (isStrong(el)) {
43
- span.marks.push('strong')
44
- }
45
- if (isUnderline(el)) {
46
- span.marks.push('underline')
47
- }
48
- if (isEmphasis(el)) {
49
- span.marks.push('em')
50
- }
51
- return span
52
- }
53
- return undefined
54
- },
55
- },
56
- ]
57
- }
@@ -1,31 +0,0 @@
1
- import {DEFAULT_SPAN} from '../../constants'
2
- import type {DeserializerRule} from '../../types'
3
- import {tagName} from '../helpers'
4
-
5
- export const whitespaceTextNodeRule: DeserializerRule = {
6
- deserialize(node) {
7
- return node.nodeName === '#text' && isWhitespaceTextNode(node)
8
- ? {
9
- ...DEFAULT_SPAN,
10
- marks: [],
11
- text: (node.textContent ?? '').replace(/\s\s+/g, ' '),
12
- }
13
- : undefined
14
- },
15
- }
16
-
17
- function isWhitespaceTextNode(node: Node) {
18
- const isValidWhiteSpace =
19
- node.nodeType === 3 &&
20
- (node.textContent || '').replace(/[\r\n]/g, ' ').replace(/\s\s+/g, ' ') ===
21
- ' ' &&
22
- node.nextSibling &&
23
- node.nextSibling.nodeType !== 3 &&
24
- node.previousSibling &&
25
- node.previousSibling.nodeType !== 3
26
-
27
- return (
28
- (isValidWhiteSpace || node.textContent !== ' ') &&
29
- tagName(node.parentNode) !== 'body'
30
- )
31
- }