@portabletext/block-tools 4.1.7 → 4.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/package.json +6 -10
  2. package/src/HtmlDeserializer/flatten-nested-blocks.test.ts +0 -248
  3. package/src/HtmlDeserializer/flatten-nested-blocks.ts +0 -173
  4. package/src/HtmlDeserializer/helpers.ts +0 -108
  5. package/src/HtmlDeserializer/index.ts +0 -315
  6. package/src/HtmlDeserializer/preprocessors/index.ts +0 -15
  7. package/src/HtmlDeserializer/preprocessors/preprocessor.gdocs.ts +0 -66
  8. package/src/HtmlDeserializer/preprocessors/preprocessor.html.ts +0 -57
  9. package/src/HtmlDeserializer/preprocessors/preprocessor.notion.ts +0 -25
  10. package/src/HtmlDeserializer/preprocessors/preprocessor.whitespace.ts +0 -56
  11. package/src/HtmlDeserializer/preprocessors/preprocessor.word.ts +0 -92
  12. package/src/HtmlDeserializer/preprocessors/xpathResult.ts +0 -13
  13. package/src/HtmlDeserializer/rules/index.ts +0 -21
  14. package/src/HtmlDeserializer/rules/rules.gdocs.ts +0 -188
  15. package/src/HtmlDeserializer/rules/rules.html.ts +0 -356
  16. package/src/HtmlDeserializer/rules/rules.notion.ts +0 -57
  17. package/src/HtmlDeserializer/rules/rules.whitespace-text-node.ts +0 -31
  18. package/src/HtmlDeserializer/rules/rules.word.ts +0 -95
  19. package/src/HtmlDeserializer/trim-whitespace.ts +0 -157
  20. package/src/HtmlDeserializer/word-online/asserters.word-online.ts +0 -153
  21. package/src/HtmlDeserializer/word-online/preprocessor.word-online.ts +0 -263
  22. package/src/HtmlDeserializer/word-online/rules.word-online.ts +0 -390
  23. package/src/constants.ts +0 -104
  24. package/src/index.ts +0 -49
  25. package/src/rules/_exports/index.ts +0 -1
  26. package/src/rules/flatten-tables.test.ts +0 -495
  27. package/src/rules/flatten-tables.ts +0 -216
  28. package/src/rules/index.ts +0 -1
  29. package/src/schema-matchers.ts +0 -41
  30. package/src/types.ts +0 -100
  31. package/src/util/findBlockType.ts +0 -13
  32. package/src/util/normalizeBlock.ts +0 -171
  33. package/src/util/randomKey.ts +0 -28
  34. package/src/util/resolveJsType.ts +0 -44
@@ -1,95 +0,0 @@
1
- import {
2
- BLOCK_DEFAULT_STYLE,
3
- DEFAULT_BLOCK,
4
- HTML_HEADER_TAGS,
5
- } from '../../constants'
6
- import type {DeserializerRule} from '../../types'
7
- import {isElement, tagName} from '../helpers'
8
-
9
- function getListItemStyle(el: Node): string | undefined {
10
- const style = isElement(el) && el.getAttribute('style')
11
- if (!style) {
12
- return undefined
13
- }
14
-
15
- if (!style.match(/lfo\d+/)) {
16
- return undefined
17
- }
18
-
19
- return style.match('lfo1') ? 'number' : 'bullet'
20
- }
21
-
22
- function getListItemLevel(el: Node): number | undefined {
23
- const style = isElement(el) && el.getAttribute('style')
24
- if (!style) {
25
- return undefined
26
- }
27
-
28
- const levelMatch = style.match(/level\d+/)
29
- if (!levelMatch) {
30
- return undefined
31
- }
32
-
33
- const [level] = levelMatch[0].match(/\d/) || []
34
- const levelNum = level ? Number.parseInt(level, 10) : 1
35
- return levelNum || 1
36
- }
37
-
38
- function isWordListElement(el: Node): boolean {
39
- if (!isElement(el)) {
40
- return false
41
- }
42
-
43
- // Check for specific class names
44
- if (el.className) {
45
- if (
46
- el.className === 'MsoListParagraphCxSpFirst' ||
47
- el.className === 'MsoListParagraphCxSpMiddle' ||
48
- el.className === 'MsoListParagraphCxSpLast'
49
- ) {
50
- return true
51
- }
52
- }
53
-
54
- // Check for mso-list in style attribute
55
- const style = el.getAttribute('style')
56
- if (style && /mso-list:\s*l\d+\s+level\d+\s+lfo\d+/.test(style)) {
57
- return true
58
- }
59
-
60
- return false
61
- }
62
-
63
- function getHeadingStyle(el: Node): string | undefined {
64
- const tag = tagName(el)
65
- if (tag && HTML_HEADER_TAGS[tag]) {
66
- return HTML_HEADER_TAGS[tag]?.style
67
- }
68
- return undefined
69
- }
70
-
71
- export function createWordRules(): DeserializerRule[] {
72
- return [
73
- {
74
- deserialize(el, next) {
75
- const tag = tagName(el)
76
-
77
- // Handle list items (both paragraphs and headings)
78
- if (
79
- (tag === 'p' || HTML_HEADER_TAGS[tag || '']) &&
80
- isWordListElement(el)
81
- ) {
82
- const headingStyle = getHeadingStyle(el)
83
- return {
84
- ...DEFAULT_BLOCK,
85
- listItem: getListItemStyle(el),
86
- level: getListItemLevel(el),
87
- style: headingStyle || BLOCK_DEFAULT_STYLE,
88
- children: next(el.childNodes),
89
- }
90
- }
91
- return undefined
92
- },
93
- },
94
- ]
95
- }
@@ -1,157 +0,0 @@
1
- import {
2
- isSpan,
3
- isTextBlock,
4
- type PortableTextTextBlock,
5
- type Schema,
6
- } from '@portabletext/schema'
7
- import {isEqual} from 'lodash'
8
- import type {TypedObject, WhiteSpacePasteMode} from '../types'
9
- import {isMinimalSpan} from './helpers'
10
-
11
- export function trimWhitespace(
12
- context: {schema: Schema},
13
- mode: WhiteSpacePasteMode,
14
- blocks: TypedObject[],
15
- ): TypedObject[] {
16
- const trimmedBlocks: TypedObject[] = []
17
- let consecutiveEmptyCount = 0
18
-
19
- for (const block of blocks) {
20
- const trimmedBlock = isTextBlock(context, block)
21
- ? trimTextBlockWhitespace(block)
22
- : block
23
-
24
- if (mode === 'preserve') {
25
- trimmedBlocks.push(trimmedBlock)
26
-
27
- continue
28
- }
29
-
30
- if (mode === 'remove') {
31
- if (isEmptyTextBlock(context, trimmedBlock)) {
32
- continue
33
- }
34
-
35
- trimmedBlocks.push(trimmedBlock)
36
-
37
- continue
38
- }
39
-
40
- if (mode === 'normalize') {
41
- if (isEmptyTextBlock(context, trimmedBlock)) {
42
- consecutiveEmptyCount++
43
-
44
- if (consecutiveEmptyCount === 1) {
45
- trimmedBlocks.push(trimmedBlock)
46
- }
47
-
48
- continue
49
- }
50
-
51
- trimmedBlocks.push(trimmedBlock)
52
-
53
- consecutiveEmptyCount = 0
54
- }
55
- }
56
-
57
- return trimmedBlocks
58
- }
59
-
60
- function isEmptyTextBlock(
61
- context: {schema: Schema},
62
- block: TypedObject,
63
- ): boolean {
64
- if (!isTextBlock(context, block)) {
65
- return false
66
- }
67
-
68
- if (
69
- block.children.some(
70
- (child) => !isSpan(context, child) || child.text.trim() !== '',
71
- )
72
- ) {
73
- return false
74
- }
75
-
76
- return true
77
- }
78
-
79
- function trimTextBlockWhitespace(
80
- block: PortableTextTextBlock,
81
- ): PortableTextTextBlock {
82
- let index = 0
83
-
84
- for (const child of block.children) {
85
- if (!isMinimalSpan(child)) {
86
- index++
87
- continue
88
- }
89
-
90
- const nextChild = nextSpan(block, index)
91
- const prevChild = prevSpan(block, index)
92
-
93
- if (index === 0) {
94
- child.text = child.text.replace(/^[^\S\n]+/g, '')
95
- }
96
-
97
- if (index === block.children.length - 1) {
98
- child.text = child.text.replace(/[^\S\n]+$/g, '')
99
- }
100
-
101
- if (
102
- /\s/.test(child.text.slice(Math.max(0, child.text.length - 1))) &&
103
- nextChild &&
104
- isMinimalSpan(nextChild) &&
105
- /\s/.test(nextChild.text.slice(0, 1))
106
- ) {
107
- child.text = child.text.replace(/[^\S\n]+$/g, '')
108
- }
109
-
110
- if (
111
- /\s/.test(child.text.slice(0, 1)) &&
112
- prevChild &&
113
- isMinimalSpan(prevChild) &&
114
- /\s/.test(prevChild.text.slice(Math.max(0, prevChild.text.length - 1)))
115
- ) {
116
- child.text = child.text.replace(/^[^\S\n]+/g, '')
117
- }
118
-
119
- if (!child.text) {
120
- block.children.splice(index, 1)
121
- }
122
-
123
- if (
124
- prevChild &&
125
- isEqual(prevChild.marks, child.marks) &&
126
- isWhiteSpaceChar(child.text)
127
- ) {
128
- prevChild.text += ' '
129
- block.children.splice(index, 1)
130
- } else if (
131
- nextChild &&
132
- isEqual(nextChild.marks, child.marks) &&
133
- isWhiteSpaceChar(child.text)
134
- ) {
135
- nextChild.text = ` ${nextChild.text}`
136
- block.children.splice(index, 1)
137
- }
138
-
139
- index++
140
- }
141
-
142
- return block
143
- }
144
-
145
- function nextSpan(block: PortableTextTextBlock, index: number) {
146
- const next = block.children[index + 1]
147
- return next && next._type === 'span' ? next : null
148
- }
149
-
150
- function prevSpan(block: PortableTextTextBlock, index: number) {
151
- const prev = block.children[index - 1]
152
- return prev && prev._type === 'span' ? prev : null
153
- }
154
-
155
- function isWhiteSpaceChar(text: string) {
156
- return ['\xa0', ' '].includes(text)
157
- }
@@ -1,153 +0,0 @@
1
- import {isElement, tagName} from '../helpers'
2
-
3
- export function isWordOnlineHtml(html: string): boolean {
4
- return (
5
- /class="(?:TextRun|NormalTextRun)[^"]*SCXW\d+[^"]*BCX\d+/.test(html) ||
6
- /class="EOP[^"]*SCXW\d+/.test(html)
7
- )
8
- }
9
-
10
- export function isWordOnlineTextRun(el: Node): boolean {
11
- if (!isElement(el) || tagName(el) !== 'span') {
12
- return false
13
- }
14
-
15
- return el.classList.contains('TextRun') && !el.classList.contains('EOP')
16
- }
17
-
18
- /**
19
- * Identifies the inner text holder spans in Word Online's nested structure.
20
- * Word Online uses: <span class="TextRun"><span class="NormalTextRun">text</span></span>
21
- * This function matches the inner span where actual text content lives.
22
- * Use this to find the text-bearing children within a TextRun container.
23
- */
24
- export function isNormalTextRun(el: Node): boolean {
25
- if (!isElement(el) || tagName(el) !== 'span') {
26
- return false
27
- }
28
-
29
- return el.classList.contains('NormalTextRun')
30
- }
31
-
32
- /**
33
- * Identifies the outer container spans in Word Online's nested structure.
34
- * Word Online uses: <span class="TextRun"><span class="NormalTextRun">text</span></span>
35
- * This function matches the outer span that holds formatting and contains NormalTextRun children.
36
- * Use this to find the top-level containers, then search within for NormalTextRun spans.
37
- * Note: Excludes spans that also have "NormalTextRun" or "EOP" classes.
38
- */
39
- export function isTextRunSpan(el: Node): boolean {
40
- if (!isElement(el) || tagName(el) !== 'span') {
41
- return false
42
- }
43
-
44
- return (
45
- el.classList.contains('TextRun') &&
46
- !el.classList.contains('NormalTextRun') &&
47
- !el.classList.contains('EOP')
48
- )
49
- }
50
-
51
- export function isEmptyOutlineElement(el: Element): boolean {
52
- if (!isElement(el)) {
53
- return false
54
- }
55
-
56
- if (!el.classList.contains('OutlineElement')) {
57
- return false
58
- }
59
-
60
- return el.textContent.trim() === ''
61
- }
62
-
63
- export function isFindHit(el: Node): boolean {
64
- if (!isElement(el) || tagName(el) !== 'span') {
65
- return false
66
- }
67
-
68
- return el.classList.contains('FindHit')
69
- }
70
-
71
- export function isInHeading(el: Node): boolean {
72
- let current: Node | null = el
73
-
74
- while (current) {
75
- if (isElement(current)) {
76
- if (
77
- tagName(current) === 'word-online-block' &&
78
- /^heading \d$/.test(current.getAttribute('data-parastyle') ?? '')
79
- ) {
80
- return true
81
- }
82
- }
83
-
84
- current = current.parentNode
85
- }
86
-
87
- return false
88
- }
89
-
90
- export function isInBlockquote(el: Node): boolean {
91
- let current: Node | null = el
92
-
93
- while (current) {
94
- if (isElement(current)) {
95
- if (
96
- tagName(current) === 'word-online-block' &&
97
- current.getAttribute('data-parastyle') === 'Quote'
98
- ) {
99
- return true
100
- }
101
- }
102
-
103
- current = current.parentNode
104
- }
105
-
106
- return false
107
- }
108
-
109
- /**********************
110
- * Formatting asserters
111
- **********************/
112
-
113
- export function hasStrongFormatting(el: Element): boolean {
114
- const style = el.getAttribute('style') ?? ''
115
-
116
- return (
117
- el.classList.contains('MacChromeBold') ||
118
- /font-weight\s*:\s*bold/.test(style)
119
- )
120
- }
121
-
122
- export function hasEmphasisFormatting(el: Element): boolean {
123
- const style = el.getAttribute('style') ?? ''
124
-
125
- return /font-style\s*:\s*italic/.test(style)
126
- }
127
-
128
- export function hasUnderlineFormatting(el: Element): boolean {
129
- const style = el.getAttribute('style') ?? ''
130
-
131
- return (
132
- el.classList.contains('Underlined') ||
133
- /text-decoration\s*:\s*underline/.test(style)
134
- )
135
- }
136
-
137
- export function hasStrikethroughFormatting(el: Element): boolean {
138
- const style = el.getAttribute('style') ?? ''
139
-
140
- return (
141
- el.classList.contains('Strikethrough') ||
142
- /text-decoration\s*:\s*line-through/.test(style)
143
- )
144
- }
145
-
146
- export function hasFormatting(el: Element): boolean {
147
- return (
148
- hasStrongFormatting(el) ||
149
- hasEmphasisFormatting(el) ||
150
- hasUnderlineFormatting(el) ||
151
- hasStrikethroughFormatting(el)
152
- )
153
- }
@@ -1,263 +0,0 @@
1
- import {isElement, tagName} from '../helpers'
2
- import {
3
- hasFormatting,
4
- isNormalTextRun,
5
- isTextRunSpan,
6
- isWordOnlineHtml,
7
- } from './asserters.word-online'
8
-
9
- export function preprocessWordOnline(html: string, doc: Document): Document {
10
- if (!isWordOnlineHtml(html)) {
11
- return doc
12
- }
13
-
14
- const paragraphs = Array.from(
15
- doc.querySelectorAll('p.Paragraph[role="heading"]'),
16
- )
17
-
18
- for (const paragraph of paragraphs) {
19
- const ariaLevel = paragraph.getAttribute('aria-level')
20
-
21
- if (ariaLevel) {
22
- const wrapper = doc.createElement('word-online-block')
23
-
24
- wrapper.setAttribute('data-parastyle', `heading ${ariaLevel}`)
25
-
26
- const parent = paragraph.parentNode
27
-
28
- if (parent) {
29
- parent.insertBefore(wrapper, paragraph)
30
-
31
- while (paragraph.firstChild) {
32
- wrapper.appendChild(paragraph.firstChild)
33
- }
34
-
35
- parent.removeChild(paragraph)
36
- }
37
- }
38
- }
39
-
40
- // Group NormalTextRun spans with the same data-ccp-parastyle attribute
41
- // This handles cases like blockquotes, headings where multiple spans should form one block
42
- // Process from the body directly to handle DOM mutations
43
- let child = doc.body.firstChild
44
-
45
- while (child) {
46
- const next = child.nextSibling
47
-
48
- if (!isElement(child) || !tagName(child)?.includes('span')) {
49
- child = next
50
-
51
- continue
52
- }
53
-
54
- const paraStyle = getParaStyle(child)
55
-
56
- if (!paraStyle) {
57
- child = next
58
-
59
- continue
60
- }
61
-
62
- // Found a span with paragraph style - collect all consecutive siblings with same style
63
- const group: Element[] = [child]
64
- let sibling = next
65
-
66
- while (sibling) {
67
- if (!isElement(sibling) || getParaStyle(sibling) !== paraStyle) {
68
- break
69
- }
70
-
71
- group.push(sibling)
72
- sibling = sibling.nextSibling
73
- }
74
-
75
- // Wrap the spans in a container
76
- // Use a custom element name to avoid conflicts with HTML rules
77
- const wrapper = doc.createElement('word-online-block')
78
- wrapper.setAttribute('data-parastyle', paraStyle)
79
-
80
- // Insert the wrapper before the first span
81
- doc.body.insertBefore(wrapper, child)
82
-
83
- // Move all grouped spans into the wrapper
84
- for (const span of group) {
85
- wrapper.appendChild(span)
86
- }
87
-
88
- // Continue with the sibling after the last grouped span
89
- child = sibling
90
- }
91
-
92
- // Find all TextRun spans
93
- const textRunSpans = Array.from(doc.body.querySelectorAll('span')).filter(
94
- isTextRunSpan,
95
- )
96
-
97
- for (const textRunSpan of textRunSpans) {
98
- // Find ALL NormalTextRun children (Word Online can have multiple per TextRun)
99
- const normalTextRuns = Array.from(textRunSpan.childNodes).filter(
100
- isNormalTextRun,
101
- )
102
-
103
- for (const normalTextRun of normalTextRuns) {
104
- // Process ALL nested spans with whitespace in this NormalTextRun
105
- // We need to process them in a loop since removing one might affect indices
106
- let foundNestedSpan = true
107
-
108
- while (foundNestedSpan) {
109
- const children = Array.from(normalTextRun.childNodes)
110
- const nestedSpanIndex = children.findIndex(
111
- (node) =>
112
- isElement(node) &&
113
- tagName(node) === 'span' &&
114
- node.textContent.trim() === '',
115
- )
116
-
117
- if (nestedSpanIndex === -1) {
118
- foundNestedSpan = false
119
- break
120
- }
121
-
122
- const nestedSpan = children.at(nestedSpanIndex)
123
-
124
- if (!nestedSpan) {
125
- foundNestedSpan = false
126
- break
127
- }
128
-
129
- // Word Online uses non-breaking spaces, convert to regular spaces
130
- const spaceText = nestedSpan.textContent?.replace(/\u00a0/g, ' ') ?? ''
131
-
132
- // Determine if the space is at the beginning or end BEFORE removing it
133
- // Check if there are any text nodes before this position
134
- const hasTextBefore = children
135
- .slice(0, nestedSpanIndex)
136
- .some((n) => n.nodeType === 3)
137
- const isSpaceAtBeginning = !hasTextBefore
138
-
139
- // Remove the nested span
140
- normalTextRun.removeChild(nestedSpan)
141
-
142
- if (isSpaceAtBeginning) {
143
- // Space at the beginning - keep it at the beginning
144
- const firstTextNode = Array.from(normalTextRun.childNodes).find(
145
- (n) => n.nodeType === 3,
146
- )
147
-
148
- if (firstTextNode) {
149
- firstTextNode.textContent =
150
- spaceText + (firstTextNode.textContent || '')
151
- } else {
152
- // No text node exists, create one with the space
153
- const spaceNode = doc.createTextNode(spaceText)
154
- normalTextRun.insertBefore(spaceNode, normalTextRun.firstChild)
155
- }
156
- } else {
157
- // Space at the end - check if we should move it to the next TextRun
158
- // Only move if the formatting (marks) are different
159
- const nextSibling = textRunSpan.nextSibling
160
- const currentHasFormatting = hasFormatting(textRunSpan)
161
-
162
- if (
163
- nextSibling &&
164
- isElement(nextSibling) &&
165
- isTextRunSpan(nextSibling)
166
- ) {
167
- const nextHasFormatting = hasFormatting(nextSibling)
168
-
169
- // If current has formatting but next doesn't, move space to the next span
170
- // This makes semantic sense: "**bar** baz" or "*bar* baz" → space should be outside the formatting
171
- if (currentHasFormatting && !nextHasFormatting) {
172
- const nextNormalTextRun = Array.from(nextSibling.childNodes).find(
173
- isNormalTextRun,
174
- )
175
-
176
- if (nextNormalTextRun && isElement(nextNormalTextRun)) {
177
- // Prepend space to the first text node of the next NormalTextRun
178
- const firstChild = nextNormalTextRun.firstChild
179
-
180
- if (firstChild && firstChild.nodeType === 3) {
181
- firstChild.textContent =
182
- spaceText + (firstChild.textContent ?? '')
183
- } else {
184
- // No text node, insert a new one at the beginning
185
- const spaceNode = doc.createTextNode(spaceText)
186
- nextNormalTextRun.insertBefore(
187
- spaceNode,
188
- nextNormalTextRun.firstChild,
189
- )
190
- }
191
- }
192
- // Don't add the space back to the current span - it's been moved
193
- } else {
194
- // Same formatting, keep the space in the current span
195
- const lastTextNode = Array.from(normalTextRun.childNodes).find(
196
- (n) => n.nodeType === 3,
197
- )
198
-
199
- if (lastTextNode) {
200
- lastTextNode.textContent =
201
- (lastTextNode.textContent ?? '') + spaceText
202
- } else {
203
- // No text node exists, create one with the space
204
- const spaceNode = doc.createTextNode(spaceText)
205
- normalTextRun.appendChild(spaceNode)
206
- }
207
- }
208
- } else {
209
- // No next TextRun, keep the space at the end of this span's text
210
- const lastTextNode = Array.from(normalTextRun.childNodes).find(
211
- (n) => n.nodeType === 3,
212
- )
213
-
214
- if (lastTextNode) {
215
- lastTextNode.textContent =
216
- (lastTextNode.textContent ?? '') + spaceText
217
- } else {
218
- // No text node exists, create one with the space
219
- const spaceNode = doc.createTextNode(spaceText)
220
- normalTextRun.appendChild(spaceNode)
221
- }
222
- }
223
- }
224
- }
225
- }
226
- }
227
-
228
- return doc
229
- }
230
-
231
- // Helper to get paragraph style from a span (either directly or from NormalTextRun child)
232
- function getParaStyle(element: Element): string | undefined {
233
- // Check direct attribute first
234
- const directStyle = element.getAttribute('data-ccp-parastyle')
235
-
236
- if (directStyle) {
237
- return directStyle
238
- }
239
-
240
- // Check if it's a TextRun with NormalTextRun children that have the attribute
241
- if (tagName(element) === 'span' && element.classList.contains('TextRun')) {
242
- const normalTextRuns = Array.from(
243
- element.querySelectorAll('.NormalTextRun'),
244
- )
245
-
246
- if (normalTextRuns.length > 0) {
247
- const firstStyle = normalTextRuns[0].getAttribute('data-ccp-parastyle')
248
-
249
- // Verify all NormalTextRuns have the same style
250
- if (
251
- firstStyle &&
252
- normalTextRuns.every(
253
- (normalTextRun) =>
254
- normalTextRun.getAttribute('data-ccp-parastyle') === firstStyle,
255
- )
256
- ) {
257
- return firstStyle
258
- }
259
- }
260
- }
261
-
262
- return undefined
263
- }