@portabletext/block-tools 4.1.8 → 4.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/_chunks-es/helpers.js +81 -9
- package/lib/_chunks-es/helpers.js.map +1 -1
- package/lib/index.js +14 -18
- package/lib/index.js.map +1 -1
- package/package.json +7 -10
- package/src/HtmlDeserializer/flatten-nested-blocks.test.ts +0 -248
- package/src/HtmlDeserializer/flatten-nested-blocks.ts +0 -173
- package/src/HtmlDeserializer/helpers.ts +0 -108
- package/src/HtmlDeserializer/index.ts +0 -315
- package/src/HtmlDeserializer/preprocessors/index.ts +0 -15
- package/src/HtmlDeserializer/preprocessors/preprocessor.gdocs.ts +0 -66
- package/src/HtmlDeserializer/preprocessors/preprocessor.html.ts +0 -57
- package/src/HtmlDeserializer/preprocessors/preprocessor.notion.ts +0 -25
- package/src/HtmlDeserializer/preprocessors/preprocessor.whitespace.ts +0 -56
- package/src/HtmlDeserializer/preprocessors/preprocessor.word.ts +0 -92
- package/src/HtmlDeserializer/preprocessors/xpathResult.ts +0 -13
- package/src/HtmlDeserializer/rules/index.ts +0 -21
- package/src/HtmlDeserializer/rules/rules.gdocs.ts +0 -188
- package/src/HtmlDeserializer/rules/rules.html.ts +0 -356
- package/src/HtmlDeserializer/rules/rules.notion.ts +0 -57
- package/src/HtmlDeserializer/rules/rules.whitespace-text-node.ts +0 -31
- package/src/HtmlDeserializer/rules/rules.word.ts +0 -95
- package/src/HtmlDeserializer/trim-whitespace.ts +0 -157
- package/src/HtmlDeserializer/word-online/asserters.word-online.ts +0 -153
- package/src/HtmlDeserializer/word-online/preprocessor.word-online.ts +0 -263
- package/src/HtmlDeserializer/word-online/rules.word-online.ts +0 -390
- package/src/constants.ts +0 -104
- package/src/index.ts +0 -49
- package/src/rules/_exports/index.ts +0 -1
- package/src/rules/flatten-tables.test.ts +0 -495
- package/src/rules/flatten-tables.ts +0 -216
- package/src/rules/index.ts +0 -1
- package/src/schema-matchers.ts +0 -41
- package/src/types.ts +0 -100
- package/src/util/findBlockType.ts +0 -13
- package/src/util/normalizeBlock.ts +0 -171
- package/src/util/randomKey.ts +0 -28
- package/src/util/resolveJsType.ts +0 -44
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
BLOCK_DEFAULT_STYLE,
|
|
3
|
-
DEFAULT_BLOCK,
|
|
4
|
-
HTML_HEADER_TAGS,
|
|
5
|
-
} from '../../constants'
|
|
6
|
-
import type {DeserializerRule} from '../../types'
|
|
7
|
-
import {isElement, tagName} from '../helpers'
|
|
8
|
-
|
|
9
|
-
function getListItemStyle(el: Node): string | undefined {
|
|
10
|
-
const style = isElement(el) && el.getAttribute('style')
|
|
11
|
-
if (!style) {
|
|
12
|
-
return undefined
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
if (!style.match(/lfo\d+/)) {
|
|
16
|
-
return undefined
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
return style.match('lfo1') ? 'number' : 'bullet'
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
function getListItemLevel(el: Node): number | undefined {
|
|
23
|
-
const style = isElement(el) && el.getAttribute('style')
|
|
24
|
-
if (!style) {
|
|
25
|
-
return undefined
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
const levelMatch = style.match(/level\d+/)
|
|
29
|
-
if (!levelMatch) {
|
|
30
|
-
return undefined
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
const [level] = levelMatch[0].match(/\d/) || []
|
|
34
|
-
const levelNum = level ? Number.parseInt(level, 10) : 1
|
|
35
|
-
return levelNum || 1
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
function isWordListElement(el: Node): boolean {
|
|
39
|
-
if (!isElement(el)) {
|
|
40
|
-
return false
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
// Check for specific class names
|
|
44
|
-
if (el.className) {
|
|
45
|
-
if (
|
|
46
|
-
el.className === 'MsoListParagraphCxSpFirst' ||
|
|
47
|
-
el.className === 'MsoListParagraphCxSpMiddle' ||
|
|
48
|
-
el.className === 'MsoListParagraphCxSpLast'
|
|
49
|
-
) {
|
|
50
|
-
return true
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
// Check for mso-list in style attribute
|
|
55
|
-
const style = el.getAttribute('style')
|
|
56
|
-
if (style && /mso-list:\s*l\d+\s+level\d+\s+lfo\d+/.test(style)) {
|
|
57
|
-
return true
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
return false
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
function getHeadingStyle(el: Node): string | undefined {
|
|
64
|
-
const tag = tagName(el)
|
|
65
|
-
if (tag && HTML_HEADER_TAGS[tag]) {
|
|
66
|
-
return HTML_HEADER_TAGS[tag]?.style
|
|
67
|
-
}
|
|
68
|
-
return undefined
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
export function createWordRules(): DeserializerRule[] {
|
|
72
|
-
return [
|
|
73
|
-
{
|
|
74
|
-
deserialize(el, next) {
|
|
75
|
-
const tag = tagName(el)
|
|
76
|
-
|
|
77
|
-
// Handle list items (both paragraphs and headings)
|
|
78
|
-
if (
|
|
79
|
-
(tag === 'p' || HTML_HEADER_TAGS[tag || '']) &&
|
|
80
|
-
isWordListElement(el)
|
|
81
|
-
) {
|
|
82
|
-
const headingStyle = getHeadingStyle(el)
|
|
83
|
-
return {
|
|
84
|
-
...DEFAULT_BLOCK,
|
|
85
|
-
listItem: getListItemStyle(el),
|
|
86
|
-
level: getListItemLevel(el),
|
|
87
|
-
style: headingStyle || BLOCK_DEFAULT_STYLE,
|
|
88
|
-
children: next(el.childNodes),
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
return undefined
|
|
92
|
-
},
|
|
93
|
-
},
|
|
94
|
-
]
|
|
95
|
-
}
|
|
@@ -1,157 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
isSpan,
|
|
3
|
-
isTextBlock,
|
|
4
|
-
type PortableTextTextBlock,
|
|
5
|
-
type Schema,
|
|
6
|
-
} from '@portabletext/schema'
|
|
7
|
-
import {isEqual} from 'lodash'
|
|
8
|
-
import type {TypedObject, WhiteSpacePasteMode} from '../types'
|
|
9
|
-
import {isMinimalSpan} from './helpers'
|
|
10
|
-
|
|
11
|
-
export function trimWhitespace(
|
|
12
|
-
context: {schema: Schema},
|
|
13
|
-
mode: WhiteSpacePasteMode,
|
|
14
|
-
blocks: TypedObject[],
|
|
15
|
-
): TypedObject[] {
|
|
16
|
-
const trimmedBlocks: TypedObject[] = []
|
|
17
|
-
let consecutiveEmptyCount = 0
|
|
18
|
-
|
|
19
|
-
for (const block of blocks) {
|
|
20
|
-
const trimmedBlock = isTextBlock(context, block)
|
|
21
|
-
? trimTextBlockWhitespace(block)
|
|
22
|
-
: block
|
|
23
|
-
|
|
24
|
-
if (mode === 'preserve') {
|
|
25
|
-
trimmedBlocks.push(trimmedBlock)
|
|
26
|
-
|
|
27
|
-
continue
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
if (mode === 'remove') {
|
|
31
|
-
if (isEmptyTextBlock(context, trimmedBlock)) {
|
|
32
|
-
continue
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
trimmedBlocks.push(trimmedBlock)
|
|
36
|
-
|
|
37
|
-
continue
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
if (mode === 'normalize') {
|
|
41
|
-
if (isEmptyTextBlock(context, trimmedBlock)) {
|
|
42
|
-
consecutiveEmptyCount++
|
|
43
|
-
|
|
44
|
-
if (consecutiveEmptyCount === 1) {
|
|
45
|
-
trimmedBlocks.push(trimmedBlock)
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
continue
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
trimmedBlocks.push(trimmedBlock)
|
|
52
|
-
|
|
53
|
-
consecutiveEmptyCount = 0
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
return trimmedBlocks
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
function isEmptyTextBlock(
|
|
61
|
-
context: {schema: Schema},
|
|
62
|
-
block: TypedObject,
|
|
63
|
-
): boolean {
|
|
64
|
-
if (!isTextBlock(context, block)) {
|
|
65
|
-
return false
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
if (
|
|
69
|
-
block.children.some(
|
|
70
|
-
(child) => !isSpan(context, child) || child.text.trim() !== '',
|
|
71
|
-
)
|
|
72
|
-
) {
|
|
73
|
-
return false
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
return true
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
function trimTextBlockWhitespace(
|
|
80
|
-
block: PortableTextTextBlock,
|
|
81
|
-
): PortableTextTextBlock {
|
|
82
|
-
let index = 0
|
|
83
|
-
|
|
84
|
-
for (const child of block.children) {
|
|
85
|
-
if (!isMinimalSpan(child)) {
|
|
86
|
-
index++
|
|
87
|
-
continue
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
const nextChild = nextSpan(block, index)
|
|
91
|
-
const prevChild = prevSpan(block, index)
|
|
92
|
-
|
|
93
|
-
if (index === 0) {
|
|
94
|
-
child.text = child.text.replace(/^[^\S\n]+/g, '')
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
if (index === block.children.length - 1) {
|
|
98
|
-
child.text = child.text.replace(/[^\S\n]+$/g, '')
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
if (
|
|
102
|
-
/\s/.test(child.text.slice(Math.max(0, child.text.length - 1))) &&
|
|
103
|
-
nextChild &&
|
|
104
|
-
isMinimalSpan(nextChild) &&
|
|
105
|
-
/\s/.test(nextChild.text.slice(0, 1))
|
|
106
|
-
) {
|
|
107
|
-
child.text = child.text.replace(/[^\S\n]+$/g, '')
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
if (
|
|
111
|
-
/\s/.test(child.text.slice(0, 1)) &&
|
|
112
|
-
prevChild &&
|
|
113
|
-
isMinimalSpan(prevChild) &&
|
|
114
|
-
/\s/.test(prevChild.text.slice(Math.max(0, prevChild.text.length - 1)))
|
|
115
|
-
) {
|
|
116
|
-
child.text = child.text.replace(/^[^\S\n]+/g, '')
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
if (!child.text) {
|
|
120
|
-
block.children.splice(index, 1)
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
if (
|
|
124
|
-
prevChild &&
|
|
125
|
-
isEqual(prevChild.marks, child.marks) &&
|
|
126
|
-
isWhiteSpaceChar(child.text)
|
|
127
|
-
) {
|
|
128
|
-
prevChild.text += ' '
|
|
129
|
-
block.children.splice(index, 1)
|
|
130
|
-
} else if (
|
|
131
|
-
nextChild &&
|
|
132
|
-
isEqual(nextChild.marks, child.marks) &&
|
|
133
|
-
isWhiteSpaceChar(child.text)
|
|
134
|
-
) {
|
|
135
|
-
nextChild.text = ` ${nextChild.text}`
|
|
136
|
-
block.children.splice(index, 1)
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
index++
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
return block
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
function nextSpan(block: PortableTextTextBlock, index: number) {
|
|
146
|
-
const next = block.children[index + 1]
|
|
147
|
-
return next && next._type === 'span' ? next : null
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
function prevSpan(block: PortableTextTextBlock, index: number) {
|
|
151
|
-
const prev = block.children[index - 1]
|
|
152
|
-
return prev && prev._type === 'span' ? prev : null
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
function isWhiteSpaceChar(text: string) {
|
|
156
|
-
return ['\xa0', ' '].includes(text)
|
|
157
|
-
}
|
|
@@ -1,153 +0,0 @@
|
|
|
1
|
-
import {isElement, tagName} from '../helpers'
|
|
2
|
-
|
|
3
|
-
export function isWordOnlineHtml(html: string): boolean {
|
|
4
|
-
return (
|
|
5
|
-
/class="(?:TextRun|NormalTextRun)[^"]*SCXW\d+[^"]*BCX\d+/.test(html) ||
|
|
6
|
-
/class="EOP[^"]*SCXW\d+/.test(html)
|
|
7
|
-
)
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
export function isWordOnlineTextRun(el: Node): boolean {
|
|
11
|
-
if (!isElement(el) || tagName(el) !== 'span') {
|
|
12
|
-
return false
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
return el.classList.contains('TextRun') && !el.classList.contains('EOP')
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
/**
|
|
19
|
-
* Identifies the inner text holder spans in Word Online's nested structure.
|
|
20
|
-
* Word Online uses: <span class="TextRun"><span class="NormalTextRun">text</span></span>
|
|
21
|
-
* This function matches the inner span where actual text content lives.
|
|
22
|
-
* Use this to find the text-bearing children within a TextRun container.
|
|
23
|
-
*/
|
|
24
|
-
export function isNormalTextRun(el: Node): boolean {
|
|
25
|
-
if (!isElement(el) || tagName(el) !== 'span') {
|
|
26
|
-
return false
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
return el.classList.contains('NormalTextRun')
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
/**
|
|
33
|
-
* Identifies the outer container spans in Word Online's nested structure.
|
|
34
|
-
* Word Online uses: <span class="TextRun"><span class="NormalTextRun">text</span></span>
|
|
35
|
-
* This function matches the outer span that holds formatting and contains NormalTextRun children.
|
|
36
|
-
* Use this to find the top-level containers, then search within for NormalTextRun spans.
|
|
37
|
-
* Note: Excludes spans that also have "NormalTextRun" or "EOP" classes.
|
|
38
|
-
*/
|
|
39
|
-
export function isTextRunSpan(el: Node): boolean {
|
|
40
|
-
if (!isElement(el) || tagName(el) !== 'span') {
|
|
41
|
-
return false
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
return (
|
|
45
|
-
el.classList.contains('TextRun') &&
|
|
46
|
-
!el.classList.contains('NormalTextRun') &&
|
|
47
|
-
!el.classList.contains('EOP')
|
|
48
|
-
)
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
export function isEmptyOutlineElement(el: Element): boolean {
|
|
52
|
-
if (!isElement(el)) {
|
|
53
|
-
return false
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
if (!el.classList.contains('OutlineElement')) {
|
|
57
|
-
return false
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
return el.textContent.trim() === ''
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
export function isFindHit(el: Node): boolean {
|
|
64
|
-
if (!isElement(el) || tagName(el) !== 'span') {
|
|
65
|
-
return false
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
return el.classList.contains('FindHit')
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
export function isInHeading(el: Node): boolean {
|
|
72
|
-
let current: Node | null = el
|
|
73
|
-
|
|
74
|
-
while (current) {
|
|
75
|
-
if (isElement(current)) {
|
|
76
|
-
if (
|
|
77
|
-
tagName(current) === 'word-online-block' &&
|
|
78
|
-
/^heading \d$/.test(current.getAttribute('data-parastyle') ?? '')
|
|
79
|
-
) {
|
|
80
|
-
return true
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
current = current.parentNode
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
return false
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
export function isInBlockquote(el: Node): boolean {
|
|
91
|
-
let current: Node | null = el
|
|
92
|
-
|
|
93
|
-
while (current) {
|
|
94
|
-
if (isElement(current)) {
|
|
95
|
-
if (
|
|
96
|
-
tagName(current) === 'word-online-block' &&
|
|
97
|
-
current.getAttribute('data-parastyle') === 'Quote'
|
|
98
|
-
) {
|
|
99
|
-
return true
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
current = current.parentNode
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
return false
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
/**********************
|
|
110
|
-
* Formatting asserters
|
|
111
|
-
**********************/
|
|
112
|
-
|
|
113
|
-
export function hasStrongFormatting(el: Element): boolean {
|
|
114
|
-
const style = el.getAttribute('style') ?? ''
|
|
115
|
-
|
|
116
|
-
return (
|
|
117
|
-
el.classList.contains('MacChromeBold') ||
|
|
118
|
-
/font-weight\s*:\s*bold/.test(style)
|
|
119
|
-
)
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
export function hasEmphasisFormatting(el: Element): boolean {
|
|
123
|
-
const style = el.getAttribute('style') ?? ''
|
|
124
|
-
|
|
125
|
-
return /font-style\s*:\s*italic/.test(style)
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
export function hasUnderlineFormatting(el: Element): boolean {
|
|
129
|
-
const style = el.getAttribute('style') ?? ''
|
|
130
|
-
|
|
131
|
-
return (
|
|
132
|
-
el.classList.contains('Underlined') ||
|
|
133
|
-
/text-decoration\s*:\s*underline/.test(style)
|
|
134
|
-
)
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
export function hasStrikethroughFormatting(el: Element): boolean {
|
|
138
|
-
const style = el.getAttribute('style') ?? ''
|
|
139
|
-
|
|
140
|
-
return (
|
|
141
|
-
el.classList.contains('Strikethrough') ||
|
|
142
|
-
/text-decoration\s*:\s*line-through/.test(style)
|
|
143
|
-
)
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
export function hasFormatting(el: Element): boolean {
|
|
147
|
-
return (
|
|
148
|
-
hasStrongFormatting(el) ||
|
|
149
|
-
hasEmphasisFormatting(el) ||
|
|
150
|
-
hasUnderlineFormatting(el) ||
|
|
151
|
-
hasStrikethroughFormatting(el)
|
|
152
|
-
)
|
|
153
|
-
}
|
|
@@ -1,263 +0,0 @@
|
|
|
1
|
-
import {isElement, tagName} from '../helpers'
|
|
2
|
-
import {
|
|
3
|
-
hasFormatting,
|
|
4
|
-
isNormalTextRun,
|
|
5
|
-
isTextRunSpan,
|
|
6
|
-
isWordOnlineHtml,
|
|
7
|
-
} from './asserters.word-online'
|
|
8
|
-
|
|
9
|
-
export function preprocessWordOnline(html: string, doc: Document): Document {
|
|
10
|
-
if (!isWordOnlineHtml(html)) {
|
|
11
|
-
return doc
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
const paragraphs = Array.from(
|
|
15
|
-
doc.querySelectorAll('p.Paragraph[role="heading"]'),
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
for (const paragraph of paragraphs) {
|
|
19
|
-
const ariaLevel = paragraph.getAttribute('aria-level')
|
|
20
|
-
|
|
21
|
-
if (ariaLevel) {
|
|
22
|
-
const wrapper = doc.createElement('word-online-block')
|
|
23
|
-
|
|
24
|
-
wrapper.setAttribute('data-parastyle', `heading ${ariaLevel}`)
|
|
25
|
-
|
|
26
|
-
const parent = paragraph.parentNode
|
|
27
|
-
|
|
28
|
-
if (parent) {
|
|
29
|
-
parent.insertBefore(wrapper, paragraph)
|
|
30
|
-
|
|
31
|
-
while (paragraph.firstChild) {
|
|
32
|
-
wrapper.appendChild(paragraph.firstChild)
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
parent.removeChild(paragraph)
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
// Group NormalTextRun spans with the same data-ccp-parastyle attribute
|
|
41
|
-
// This handles cases like blockquotes, headings where multiple spans should form one block
|
|
42
|
-
// Process from the body directly to handle DOM mutations
|
|
43
|
-
let child = doc.body.firstChild
|
|
44
|
-
|
|
45
|
-
while (child) {
|
|
46
|
-
const next = child.nextSibling
|
|
47
|
-
|
|
48
|
-
if (!isElement(child) || !tagName(child)?.includes('span')) {
|
|
49
|
-
child = next
|
|
50
|
-
|
|
51
|
-
continue
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
const paraStyle = getParaStyle(child)
|
|
55
|
-
|
|
56
|
-
if (!paraStyle) {
|
|
57
|
-
child = next
|
|
58
|
-
|
|
59
|
-
continue
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
// Found a span with paragraph style - collect all consecutive siblings with same style
|
|
63
|
-
const group: Element[] = [child]
|
|
64
|
-
let sibling = next
|
|
65
|
-
|
|
66
|
-
while (sibling) {
|
|
67
|
-
if (!isElement(sibling) || getParaStyle(sibling) !== paraStyle) {
|
|
68
|
-
break
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
group.push(sibling)
|
|
72
|
-
sibling = sibling.nextSibling
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
// Wrap the spans in a container
|
|
76
|
-
// Use a custom element name to avoid conflicts with HTML rules
|
|
77
|
-
const wrapper = doc.createElement('word-online-block')
|
|
78
|
-
wrapper.setAttribute('data-parastyle', paraStyle)
|
|
79
|
-
|
|
80
|
-
// Insert the wrapper before the first span
|
|
81
|
-
doc.body.insertBefore(wrapper, child)
|
|
82
|
-
|
|
83
|
-
// Move all grouped spans into the wrapper
|
|
84
|
-
for (const span of group) {
|
|
85
|
-
wrapper.appendChild(span)
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
// Continue with the sibling after the last grouped span
|
|
89
|
-
child = sibling
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
// Find all TextRun spans
|
|
93
|
-
const textRunSpans = Array.from(doc.body.querySelectorAll('span')).filter(
|
|
94
|
-
isTextRunSpan,
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
for (const textRunSpan of textRunSpans) {
|
|
98
|
-
// Find ALL NormalTextRun children (Word Online can have multiple per TextRun)
|
|
99
|
-
const normalTextRuns = Array.from(textRunSpan.childNodes).filter(
|
|
100
|
-
isNormalTextRun,
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
for (const normalTextRun of normalTextRuns) {
|
|
104
|
-
// Process ALL nested spans with whitespace in this NormalTextRun
|
|
105
|
-
// We need to process them in a loop since removing one might affect indices
|
|
106
|
-
let foundNestedSpan = true
|
|
107
|
-
|
|
108
|
-
while (foundNestedSpan) {
|
|
109
|
-
const children = Array.from(normalTextRun.childNodes)
|
|
110
|
-
const nestedSpanIndex = children.findIndex(
|
|
111
|
-
(node) =>
|
|
112
|
-
isElement(node) &&
|
|
113
|
-
tagName(node) === 'span' &&
|
|
114
|
-
node.textContent.trim() === '',
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
if (nestedSpanIndex === -1) {
|
|
118
|
-
foundNestedSpan = false
|
|
119
|
-
break
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
const nestedSpan = children.at(nestedSpanIndex)
|
|
123
|
-
|
|
124
|
-
if (!nestedSpan) {
|
|
125
|
-
foundNestedSpan = false
|
|
126
|
-
break
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
// Word Online uses non-breaking spaces, convert to regular spaces
|
|
130
|
-
const spaceText = nestedSpan.textContent?.replace(/\u00a0/g, ' ') ?? ''
|
|
131
|
-
|
|
132
|
-
// Determine if the space is at the beginning or end BEFORE removing it
|
|
133
|
-
// Check if there are any text nodes before this position
|
|
134
|
-
const hasTextBefore = children
|
|
135
|
-
.slice(0, nestedSpanIndex)
|
|
136
|
-
.some((n) => n.nodeType === 3)
|
|
137
|
-
const isSpaceAtBeginning = !hasTextBefore
|
|
138
|
-
|
|
139
|
-
// Remove the nested span
|
|
140
|
-
normalTextRun.removeChild(nestedSpan)
|
|
141
|
-
|
|
142
|
-
if (isSpaceAtBeginning) {
|
|
143
|
-
// Space at the beginning - keep it at the beginning
|
|
144
|
-
const firstTextNode = Array.from(normalTextRun.childNodes).find(
|
|
145
|
-
(n) => n.nodeType === 3,
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
if (firstTextNode) {
|
|
149
|
-
firstTextNode.textContent =
|
|
150
|
-
spaceText + (firstTextNode.textContent || '')
|
|
151
|
-
} else {
|
|
152
|
-
// No text node exists, create one with the space
|
|
153
|
-
const spaceNode = doc.createTextNode(spaceText)
|
|
154
|
-
normalTextRun.insertBefore(spaceNode, normalTextRun.firstChild)
|
|
155
|
-
}
|
|
156
|
-
} else {
|
|
157
|
-
// Space at the end - check if we should move it to the next TextRun
|
|
158
|
-
// Only move if the formatting (marks) are different
|
|
159
|
-
const nextSibling = textRunSpan.nextSibling
|
|
160
|
-
const currentHasFormatting = hasFormatting(textRunSpan)
|
|
161
|
-
|
|
162
|
-
if (
|
|
163
|
-
nextSibling &&
|
|
164
|
-
isElement(nextSibling) &&
|
|
165
|
-
isTextRunSpan(nextSibling)
|
|
166
|
-
) {
|
|
167
|
-
const nextHasFormatting = hasFormatting(nextSibling)
|
|
168
|
-
|
|
169
|
-
// If current has formatting but next doesn't, move space to the next span
|
|
170
|
-
// This makes semantic sense: "**bar** baz" or "*bar* baz" → space should be outside the formatting
|
|
171
|
-
if (currentHasFormatting && !nextHasFormatting) {
|
|
172
|
-
const nextNormalTextRun = Array.from(nextSibling.childNodes).find(
|
|
173
|
-
isNormalTextRun,
|
|
174
|
-
)
|
|
175
|
-
|
|
176
|
-
if (nextNormalTextRun && isElement(nextNormalTextRun)) {
|
|
177
|
-
// Prepend space to the first text node of the next NormalTextRun
|
|
178
|
-
const firstChild = nextNormalTextRun.firstChild
|
|
179
|
-
|
|
180
|
-
if (firstChild && firstChild.nodeType === 3) {
|
|
181
|
-
firstChild.textContent =
|
|
182
|
-
spaceText + (firstChild.textContent ?? '')
|
|
183
|
-
} else {
|
|
184
|
-
// No text node, insert a new one at the beginning
|
|
185
|
-
const spaceNode = doc.createTextNode(spaceText)
|
|
186
|
-
nextNormalTextRun.insertBefore(
|
|
187
|
-
spaceNode,
|
|
188
|
-
nextNormalTextRun.firstChild,
|
|
189
|
-
)
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
// Don't add the space back to the current span - it's been moved
|
|
193
|
-
} else {
|
|
194
|
-
// Same formatting, keep the space in the current span
|
|
195
|
-
const lastTextNode = Array.from(normalTextRun.childNodes).find(
|
|
196
|
-
(n) => n.nodeType === 3,
|
|
197
|
-
)
|
|
198
|
-
|
|
199
|
-
if (lastTextNode) {
|
|
200
|
-
lastTextNode.textContent =
|
|
201
|
-
(lastTextNode.textContent ?? '') + spaceText
|
|
202
|
-
} else {
|
|
203
|
-
// No text node exists, create one with the space
|
|
204
|
-
const spaceNode = doc.createTextNode(spaceText)
|
|
205
|
-
normalTextRun.appendChild(spaceNode)
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
} else {
|
|
209
|
-
// No next TextRun, keep the space at the end of this span's text
|
|
210
|
-
const lastTextNode = Array.from(normalTextRun.childNodes).find(
|
|
211
|
-
(n) => n.nodeType === 3,
|
|
212
|
-
)
|
|
213
|
-
|
|
214
|
-
if (lastTextNode) {
|
|
215
|
-
lastTextNode.textContent =
|
|
216
|
-
(lastTextNode.textContent ?? '') + spaceText
|
|
217
|
-
} else {
|
|
218
|
-
// No text node exists, create one with the space
|
|
219
|
-
const spaceNode = doc.createTextNode(spaceText)
|
|
220
|
-
normalTextRun.appendChild(spaceNode)
|
|
221
|
-
}
|
|
222
|
-
}
|
|
223
|
-
}
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
return doc
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
// Helper to get paragraph style from a span (either directly or from NormalTextRun child)
|
|
232
|
-
function getParaStyle(element: Element): string | undefined {
|
|
233
|
-
// Check direct attribute first
|
|
234
|
-
const directStyle = element.getAttribute('data-ccp-parastyle')
|
|
235
|
-
|
|
236
|
-
if (directStyle) {
|
|
237
|
-
return directStyle
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
// Check if it's a TextRun with NormalTextRun children that have the attribute
|
|
241
|
-
if (tagName(element) === 'span' && element.classList.contains('TextRun')) {
|
|
242
|
-
const normalTextRuns = Array.from(
|
|
243
|
-
element.querySelectorAll('.NormalTextRun'),
|
|
244
|
-
)
|
|
245
|
-
|
|
246
|
-
if (normalTextRuns.length > 0) {
|
|
247
|
-
const firstStyle = normalTextRuns[0].getAttribute('data-ccp-parastyle')
|
|
248
|
-
|
|
249
|
-
// Verify all NormalTextRuns have the same style
|
|
250
|
-
if (
|
|
251
|
-
firstStyle &&
|
|
252
|
-
normalTextRuns.every(
|
|
253
|
-
(normalTextRun) =>
|
|
254
|
-
normalTextRun.getAttribute('data-ccp-parastyle') === firstStyle,
|
|
255
|
-
)
|
|
256
|
-
) {
|
|
257
|
-
return firstStyle
|
|
258
|
-
}
|
|
259
|
-
}
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
return undefined
|
|
263
|
-
}
|