@ckeditor/ckeditor5-paste-from-office 41.3.0 → 41.4.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index-content.css +4 -0
- package/dist/index-editor.css +4 -0
- package/dist/index.css +4 -0
- package/dist/index.js +1122 -0
- package/dist/index.js.map +1 -0
- package/dist/types/augmentation.d.ts +14 -0
- package/dist/types/filters/br.d.ts +18 -0
- package/dist/types/filters/image.d.ts +28 -0
- package/dist/types/filters/list.d.ts +30 -0
- package/dist/types/filters/parse.d.ts +39 -0
- package/dist/types/filters/removeboldwrapper.d.ts +18 -0
- package/dist/types/filters/removegooglesheetstag.d.ts +18 -0
- package/dist/types/filters/removeinvalidtablewidth.d.ts +18 -0
- package/dist/types/filters/removemsattributes.d.ts +18 -0
- package/dist/types/filters/removestyleblock.d.ts +18 -0
- package/dist/types/filters/removexmlns.d.ts +18 -0
- package/dist/types/filters/space.d.ts +29 -0
- package/dist/types/filters/utils.d.ts +29 -0
- package/dist/types/index.d.ts +16 -0
- package/dist/types/normalizer.d.ts +34 -0
- package/dist/types/normalizers/googledocsnormalizer.d.ts +33 -0
- package/dist/types/normalizers/googlesheetsnormalizer.d.ts +33 -0
- package/dist/types/normalizers/mswordnormalizer.d.ts +31 -0
- package/dist/types/pastefromoffice.d.ts +40 -0
- package/package.json +3 -2
- package/src/index.d.ts +1 -1
package/dist/index.js
ADDED
|
@@ -0,0 +1,1122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
|
|
3
|
+
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
|
|
4
|
+
*/
|
|
5
|
+
import { Plugin } from '@ckeditor/ckeditor5-core/dist/index.js';
|
|
6
|
+
import { ClipboardPipeline } from '@ckeditor/ckeditor5-clipboard/dist/index.js';
|
|
7
|
+
import { UpcastWriter, Matcher, ViewDocument, DomConverter } from '@ckeditor/ckeditor5-engine/dist/index.js';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
|
|
11
|
+
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
|
|
12
|
+
*/ /**
|
|
13
|
+
* @module paste-from-office/filters/utils
|
|
14
|
+
*/ /**
|
|
15
|
+
* Normalizes CSS length value to 'px'.
|
|
16
|
+
*
|
|
17
|
+
* @internal
|
|
18
|
+
*/ function convertCssLengthToPx(value) {
|
|
19
|
+
const numericValue = parseFloat(value);
|
|
20
|
+
if (value.endsWith('pt')) {
|
|
21
|
+
// 1pt = 1in / 72
|
|
22
|
+
return toPx(numericValue * 96 / 72);
|
|
23
|
+
} else if (value.endsWith('pc')) {
|
|
24
|
+
// 1pc = 12pt = 1in / 6.
|
|
25
|
+
return toPx(numericValue * 12 * 96 / 72);
|
|
26
|
+
} else if (value.endsWith('in')) {
|
|
27
|
+
// 1in = 2.54cm = 96px
|
|
28
|
+
return toPx(numericValue * 96);
|
|
29
|
+
} else if (value.endsWith('cm')) {
|
|
30
|
+
// 1cm = 96px / 2.54
|
|
31
|
+
return toPx(numericValue * 96 / 2.54);
|
|
32
|
+
} else if (value.endsWith('mm')) {
|
|
33
|
+
// 1mm = 1cm / 10
|
|
34
|
+
return toPx(numericValue / 10 * 96 / 2.54);
|
|
35
|
+
}
|
|
36
|
+
return value;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Returns true for value with 'px' unit.
|
|
40
|
+
*
|
|
41
|
+
* @internal
|
|
42
|
+
*/ function isPx(value) {
|
|
43
|
+
return value !== undefined && value.endsWith('px');
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Returns a rounded 'px' value.
|
|
47
|
+
*
|
|
48
|
+
* @internal
|
|
49
|
+
*/ function toPx(value) {
|
|
50
|
+
return value.toFixed(2).replace(/\.?0+$/, '') + 'px';
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Transforms Word specific list-like elements to the semantic HTML lists.
|
|
55
|
+
*
|
|
56
|
+
* Lists in Word are represented by block elements with special attributes like:
|
|
57
|
+
*
|
|
58
|
+
* ```xml
|
|
59
|
+
* <p class=MsoListParagraphCxSpFirst style='mso-list:l1 level1 lfo1'>...</p> // Paragraph based list.
|
|
60
|
+
* <h1 style='mso-list:l0 level1 lfo1'>...</h1> // Heading 1 based list.
|
|
61
|
+
* ```
|
|
62
|
+
*
|
|
63
|
+
* @param documentFragment The view structure to be transformed.
|
|
64
|
+
* @param stylesString Styles from which list-like elements styling will be extracted.
|
|
65
|
+
*/ function transformListItemLikeElementsIntoLists(documentFragment, stylesString, hasMultiLevelListPlugin) {
|
|
66
|
+
if (!documentFragment.childCount) {
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
const writer = new UpcastWriter(documentFragment.document);
|
|
70
|
+
const itemLikeElements = findAllItemLikeElements(documentFragment, writer);
|
|
71
|
+
if (!itemLikeElements.length) {
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
const encounteredLists = {};
|
|
75
|
+
const stack = [];
|
|
76
|
+
for (const itemLikeElement of itemLikeElements){
|
|
77
|
+
if (itemLikeElement.indent !== undefined) {
|
|
78
|
+
if (!isListContinuation(itemLikeElement)) {
|
|
79
|
+
stack.length = 0;
|
|
80
|
+
}
|
|
81
|
+
// Combined list ID for addressing encounter lists counters.
|
|
82
|
+
const originalListId = `${itemLikeElement.id}:${itemLikeElement.indent}`;
|
|
83
|
+
// Normalized list item indentation.
|
|
84
|
+
const indent = Math.min(itemLikeElement.indent - 1, stack.length);
|
|
85
|
+
// Trimming of the list stack on list ID change.
|
|
86
|
+
if (indent < stack.length && stack[indent].id !== itemLikeElement.id) {
|
|
87
|
+
stack.length = indent;
|
|
88
|
+
}
|
|
89
|
+
// Trimming of the list stack on lower indent list encountered.
|
|
90
|
+
if (indent < stack.length - 1) {
|
|
91
|
+
stack.length = indent + 1;
|
|
92
|
+
} else {
|
|
93
|
+
const listStyle = detectListStyle(itemLikeElement, stylesString);
|
|
94
|
+
// Create a new OL/UL if required (greater indent or different list type).
|
|
95
|
+
if (indent > stack.length - 1 || stack[indent].listElement.name != listStyle.type) {
|
|
96
|
+
// Check if there is some start index to set from a previous list.
|
|
97
|
+
if (indent == 0 && listStyle.type == 'ol' && itemLikeElement.id !== undefined && encounteredLists[originalListId]) {
|
|
98
|
+
listStyle.startIndex = encounteredLists[originalListId];
|
|
99
|
+
}
|
|
100
|
+
const listElement = createNewEmptyList(listStyle, writer, hasMultiLevelListPlugin);
|
|
101
|
+
// Apply list padding only if we have margins for the item and the parent item.
|
|
102
|
+
if (isPx(itemLikeElement.marginLeft) && (indent == 0 || isPx(stack[indent - 1].marginLeft))) {
|
|
103
|
+
let marginLeft = itemLikeElement.marginLeft;
|
|
104
|
+
if (indent > 0) {
|
|
105
|
+
// Convert the padding from absolute to relative.
|
|
106
|
+
marginLeft = toPx(parseFloat(marginLeft) - parseFloat(stack[indent - 1].marginLeft));
|
|
107
|
+
}
|
|
108
|
+
writer.setStyle('padding-left', marginLeft, listElement);
|
|
109
|
+
}
|
|
110
|
+
// Insert the new OL/UL.
|
|
111
|
+
if (stack.length == 0) {
|
|
112
|
+
const parent = itemLikeElement.element.parent;
|
|
113
|
+
const index = parent.getChildIndex(itemLikeElement.element) + 1;
|
|
114
|
+
writer.insertChild(index, listElement, parent);
|
|
115
|
+
} else {
|
|
116
|
+
const parentListItems = stack[indent - 1].listItemElements;
|
|
117
|
+
writer.appendChild(listElement, parentListItems[parentListItems.length - 1]);
|
|
118
|
+
}
|
|
119
|
+
// Update the list stack for other items to reference.
|
|
120
|
+
stack[indent] = {
|
|
121
|
+
...itemLikeElement,
|
|
122
|
+
listElement,
|
|
123
|
+
listItemElements: []
|
|
124
|
+
};
|
|
125
|
+
// Prepare list counter for start index.
|
|
126
|
+
if (indent == 0 && itemLikeElement.id !== undefined) {
|
|
127
|
+
encounteredLists[originalListId] = listStyle.startIndex || 1;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
// Use LI if it is already it or create a new LI element.
|
|
132
|
+
// https://github.com/ckeditor/ckeditor5/issues/15964
|
|
133
|
+
const listItem = itemLikeElement.element.name == 'li' ? itemLikeElement.element : writer.createElement('li');
|
|
134
|
+
// Append the LI to OL/UL.
|
|
135
|
+
writer.appendChild(listItem, stack[indent].listElement);
|
|
136
|
+
stack[indent].listItemElements.push(listItem);
|
|
137
|
+
// Increment list counter.
|
|
138
|
+
if (indent == 0 && itemLikeElement.id !== undefined) {
|
|
139
|
+
encounteredLists[originalListId]++;
|
|
140
|
+
}
|
|
141
|
+
// Append list block to LI.
|
|
142
|
+
if (itemLikeElement.element != listItem) {
|
|
143
|
+
writer.appendChild(itemLikeElement.element, listItem);
|
|
144
|
+
}
|
|
145
|
+
// Clean list block.
|
|
146
|
+
removeBulletElement(itemLikeElement.element, writer);
|
|
147
|
+
writer.removeStyle('text-indent', itemLikeElement.element); // #12361
|
|
148
|
+
writer.removeStyle('margin-left', itemLikeElement.element);
|
|
149
|
+
} else {
|
|
150
|
+
// Other blocks in a list item.
|
|
151
|
+
const stackItem = stack.find((stackItem)=>stackItem.marginLeft == itemLikeElement.marginLeft);
|
|
152
|
+
// This might be a paragraph that has known margin, but it is not a real list block.
|
|
153
|
+
if (stackItem) {
|
|
154
|
+
const listItems = stackItem.listItemElements;
|
|
155
|
+
// Append block to LI.
|
|
156
|
+
writer.appendChild(itemLikeElement.element, listItems[listItems.length - 1]);
|
|
157
|
+
writer.removeStyle('margin-left', itemLikeElement.element);
|
|
158
|
+
} else {
|
|
159
|
+
stack.length = 0;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Removes paragraph wrapping content inside a list item.
|
|
166
|
+
*/ function unwrapParagraphInListItem(documentFragment, writer) {
|
|
167
|
+
for (const value of writer.createRangeIn(documentFragment)){
|
|
168
|
+
const element = value.item;
|
|
169
|
+
if (element.is('element', 'li')) {
|
|
170
|
+
// Google Docs allows for single paragraph inside LI.
|
|
171
|
+
const firstChild = element.getChild(0);
|
|
172
|
+
if (firstChild && firstChild.is('element', 'p')) {
|
|
173
|
+
writer.unwrapElement(firstChild);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Finds all list-like elements in a given document fragment.
|
|
180
|
+
*
|
|
181
|
+
* @param documentFragment Document fragment in which to look for list-like nodes.
|
|
182
|
+
* @returns Array of found list-like items. Each item is an object containing:
|
|
183
|
+
*/ function findAllItemLikeElements(documentFragment, writer) {
|
|
184
|
+
const range = writer.createRangeIn(documentFragment);
|
|
185
|
+
const itemLikeElements = [];
|
|
186
|
+
const foundMargins = new Set();
|
|
187
|
+
for (const item of range.getItems()){
|
|
188
|
+
// https://github.com/ckeditor/ckeditor5/issues/15964
|
|
189
|
+
if (!item.is('element') || !item.name.match(/^(p|h\d+|li|div)$/)) {
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
// Try to rely on margin-left style to find paragraphs visually aligned with previously encountered list item.
|
|
193
|
+
let marginLeft = getMarginLeftNormalized(item);
|
|
194
|
+
// Ignore margin-left 0 style if there is no MsoList... class.
|
|
195
|
+
if (marginLeft !== undefined && parseFloat(marginLeft) == 0 && !Array.from(item.getClassNames()).find((className)=>className.startsWith('MsoList'))) {
|
|
196
|
+
marginLeft = undefined;
|
|
197
|
+
}
|
|
198
|
+
// List item or a following list item block.
|
|
199
|
+
if (item.hasStyle('mso-list') || marginLeft !== undefined && foundMargins.has(marginLeft)) {
|
|
200
|
+
const itemData = getListItemData(item);
|
|
201
|
+
itemLikeElements.push({
|
|
202
|
+
element: item,
|
|
203
|
+
id: itemData.id,
|
|
204
|
+
order: itemData.order,
|
|
205
|
+
indent: itemData.indent,
|
|
206
|
+
marginLeft
|
|
207
|
+
});
|
|
208
|
+
if (marginLeft !== undefined) {
|
|
209
|
+
foundMargins.add(marginLeft);
|
|
210
|
+
}
|
|
211
|
+
} else {
|
|
212
|
+
foundMargins.clear();
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
return itemLikeElements;
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Whether the given element is possibly a list continuation. Previous element was wrapped into a list
|
|
219
|
+
* or the current element already is inside a list.
|
|
220
|
+
*/ function isListContinuation(currentItem) {
|
|
221
|
+
const previousSibling = currentItem.element.previousSibling;
|
|
222
|
+
if (!previousSibling) {
|
|
223
|
+
// If it's a li inside ul or ol like in here: https://github.com/ckeditor/ckeditor5/issues/15964.
|
|
224
|
+
return isList(currentItem.element.parent);
|
|
225
|
+
}
|
|
226
|
+
// Even with the same id the list does not have to be continuous (#43).
|
|
227
|
+
return isList(previousSibling);
|
|
228
|
+
}
|
|
229
|
+
function isList(element) {
|
|
230
|
+
return element.is('element', 'ol') || element.is('element', 'ul');
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Extracts list item style from the provided CSS.
|
|
234
|
+
*
|
|
235
|
+
* List item style is extracted from the CSS stylesheet. Each list with its specific style attribute
|
|
236
|
+
* value (`mso-list:l1 level1 lfo1`) has its dedicated properties in a CSS stylesheet defined with a selector like:
|
|
237
|
+
*
|
|
238
|
+
* ```css
|
|
239
|
+
* @list l1:level1 { ... }
|
|
240
|
+
* ```
|
|
241
|
+
*
|
|
242
|
+
* It contains `mso-level-number-format` property which defines list numbering/bullet style. If this property
|
|
243
|
+
* is not defined it means default `decimal` numbering.
|
|
244
|
+
*
|
|
245
|
+
* Here CSS string representation is used as `mso-level-number-format` property is an invalid CSS property
|
|
246
|
+
* and will be removed during CSS parsing.
|
|
247
|
+
*
|
|
248
|
+
* @param listLikeItem List-like item for which list style will be searched for. Usually
|
|
249
|
+
* a result of `findAllItemLikeElements()` function.
|
|
250
|
+
* @param stylesString CSS stylesheet.
|
|
251
|
+
* @returns An object with properties:
|
|
252
|
+
*
|
|
253
|
+
* * type - List type, could be `ul` or `ol`.
|
|
254
|
+
* * startIndex - List start index, valid only for ordered lists.
|
|
255
|
+
* * style - List style, for example: `decimal`, `lower-roman`, etc. It is extracted
|
|
256
|
+
* directly from Word stylesheet and adjusted to represent proper values for the CSS `list-style-type` property.
|
|
257
|
+
* If it cannot be adjusted, the `null` value is returned.
|
|
258
|
+
*/ function detectListStyle(listLikeItem, stylesString) {
|
|
259
|
+
const listStyleRegexp = new RegExp(`@list l${listLikeItem.id}:level${listLikeItem.indent}\\s*({[^}]*)`, 'gi');
|
|
260
|
+
const listStyleTypeRegex = /mso-level-number-format:([^;]{0,100});/gi;
|
|
261
|
+
const listStartIndexRegex = /mso-level-start-at:\s{0,100}([0-9]{0,10})\s{0,100};/gi;
|
|
262
|
+
const legalStyleListRegex = new RegExp(`@list\\s+l${listLikeItem.id}:level\\d\\s*{[^{]*mso-level-text:"%\\d\\\\.`, 'gi');
|
|
263
|
+
const multiLevelNumberFormatTypeRegex = new RegExp(`@list l${listLikeItem.id}:level\\d\\s*{[^{]*mso-level-number-format:`, 'gi');
|
|
264
|
+
const legalStyleListMatch = legalStyleListRegex.exec(stylesString);
|
|
265
|
+
const multiLevelNumberFormatMatch = multiLevelNumberFormatTypeRegex.exec(stylesString);
|
|
266
|
+
// Multi level lists in Word have mso-level-number-format attribute except legal lists,
|
|
267
|
+
// so we used that. If list has legal list match and doesn't has mso-level-number-format
|
|
268
|
+
// then this is legal-list.
|
|
269
|
+
const islegalStyleList = legalStyleListMatch && !multiLevelNumberFormatMatch;
|
|
270
|
+
const listStyleMatch = listStyleRegexp.exec(stylesString);
|
|
271
|
+
let listStyleType = 'decimal'; // Decimal is default one.
|
|
272
|
+
let type = 'ol'; // <ol> is default list.
|
|
273
|
+
let startIndex = null;
|
|
274
|
+
if (listStyleMatch && listStyleMatch[1]) {
|
|
275
|
+
const listStyleTypeMatch = listStyleTypeRegex.exec(listStyleMatch[1]);
|
|
276
|
+
if (listStyleTypeMatch && listStyleTypeMatch[1]) {
|
|
277
|
+
listStyleType = listStyleTypeMatch[1].trim();
|
|
278
|
+
type = listStyleType !== 'bullet' && listStyleType !== 'image' ? 'ol' : 'ul';
|
|
279
|
+
}
|
|
280
|
+
// Styles for the numbered lists are always defined in the Word CSS stylesheet.
|
|
281
|
+
// Unordered lists MAY contain a value for the Word CSS definition `mso-level-text` but sometimes
|
|
282
|
+
// this tag is missing. And because of that, we cannot depend on that. We need to predict the list style value
|
|
283
|
+
// based on the list style marker element.
|
|
284
|
+
if (listStyleType === 'bullet') {
|
|
285
|
+
const bulletedStyle = findBulletedListStyle(listLikeItem.element);
|
|
286
|
+
if (bulletedStyle) {
|
|
287
|
+
listStyleType = bulletedStyle;
|
|
288
|
+
}
|
|
289
|
+
} else {
|
|
290
|
+
const listStartIndexMatch = listStartIndexRegex.exec(listStyleMatch[1]);
|
|
291
|
+
if (listStartIndexMatch && listStartIndexMatch[1]) {
|
|
292
|
+
startIndex = parseInt(listStartIndexMatch[1]);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
if (islegalStyleList) {
|
|
296
|
+
type = 'ol';
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
return {
|
|
300
|
+
type,
|
|
301
|
+
startIndex,
|
|
302
|
+
style: mapListStyleDefinition(listStyleType),
|
|
303
|
+
isLegalStyleList: islegalStyleList
|
|
304
|
+
};
|
|
305
|
+
}
|
|
306
|
+
/**
|
|
307
|
+
* Tries to extract the `list-style-type` value based on the marker element for bulleted list.
|
|
308
|
+
*/ function findBulletedListStyle(element) {
|
|
309
|
+
// https://github.com/ckeditor/ckeditor5/issues/15964
|
|
310
|
+
if (element.name == 'li' && element.parent.name == 'ul' && element.parent.hasAttribute('type')) {
|
|
311
|
+
return element.parent.getAttribute('type');
|
|
312
|
+
}
|
|
313
|
+
const listMarkerElement = findListMarkerNode(element);
|
|
314
|
+
if (!listMarkerElement) {
|
|
315
|
+
return null;
|
|
316
|
+
}
|
|
317
|
+
const listMarker = listMarkerElement._data;
|
|
318
|
+
if (listMarker === 'o') {
|
|
319
|
+
return 'circle';
|
|
320
|
+
} else if (listMarker === '·') {
|
|
321
|
+
return 'disc';
|
|
322
|
+
} else if (listMarker === '§') {
|
|
323
|
+
return 'square';
|
|
324
|
+
}
|
|
325
|
+
return null;
|
|
326
|
+
}
|
|
327
|
+
/**
|
|
328
|
+
* Tries to find a text node that represents the marker element (list-style-type).
|
|
329
|
+
*/ function findListMarkerNode(element) {
|
|
330
|
+
// If the first child is a text node, it is the data for the element.
|
|
331
|
+
// The list-style marker is not present here.
|
|
332
|
+
if (element.getChild(0).is('$text')) {
|
|
333
|
+
return null;
|
|
334
|
+
}
|
|
335
|
+
for (const childNode of element.getChildren()){
|
|
336
|
+
// The list-style marker will be inside the `<span>` element. Let's ignore all non-span elements.
|
|
337
|
+
// It may happen that the `<a>` element is added as the first child. Most probably, it's an anchor element.
|
|
338
|
+
if (!childNode.is('element', 'span')) {
|
|
339
|
+
continue;
|
|
340
|
+
}
|
|
341
|
+
const textNodeOrElement = childNode.getChild(0);
|
|
342
|
+
if (!textNodeOrElement) {
|
|
343
|
+
continue;
|
|
344
|
+
}
|
|
345
|
+
// If already found the marker element, use it.
|
|
346
|
+
if (textNodeOrElement.is('$text')) {
|
|
347
|
+
return textNodeOrElement;
|
|
348
|
+
}
|
|
349
|
+
return textNodeOrElement.getChild(0);
|
|
350
|
+
}
|
|
351
|
+
/* istanbul ignore next -- @preserve */ return null;
|
|
352
|
+
}
|
|
353
|
+
/**
|
|
354
|
+
* Parses the `list-style-type` value extracted directly from the Word CSS stylesheet and returns proper CSS definition.
|
|
355
|
+
*/ function mapListStyleDefinition(value) {
|
|
356
|
+
if (value.startsWith('arabic-leading-zero')) {
|
|
357
|
+
return 'decimal-leading-zero';
|
|
358
|
+
}
|
|
359
|
+
switch(value){
|
|
360
|
+
case 'alpha-upper':
|
|
361
|
+
return 'upper-alpha';
|
|
362
|
+
case 'alpha-lower':
|
|
363
|
+
return 'lower-alpha';
|
|
364
|
+
case 'roman-upper':
|
|
365
|
+
return 'upper-roman';
|
|
366
|
+
case 'roman-lower':
|
|
367
|
+
return 'lower-roman';
|
|
368
|
+
case 'circle':
|
|
369
|
+
case 'disc':
|
|
370
|
+
case 'square':
|
|
371
|
+
return value;
|
|
372
|
+
default:
|
|
373
|
+
return null;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
/**
|
|
377
|
+
* Creates a new list OL/UL element.
|
|
378
|
+
*/ function createNewEmptyList(listStyle, writer, hasMultiLevelListPlugin) {
|
|
379
|
+
const list = writer.createElement(listStyle.type);
|
|
380
|
+
// We do not support modifying the marker for a particular list item.
|
|
381
|
+
// Set the value for the `list-style-type` property directly to the list container.
|
|
382
|
+
if (listStyle.style) {
|
|
383
|
+
writer.setStyle('list-style-type', listStyle.style, list);
|
|
384
|
+
}
|
|
385
|
+
if (listStyle.startIndex && listStyle.startIndex > 1) {
|
|
386
|
+
writer.setAttribute('start', listStyle.startIndex, list);
|
|
387
|
+
}
|
|
388
|
+
if (listStyle.isLegalStyleList && hasMultiLevelListPlugin) {
|
|
389
|
+
writer.addClass('legal-list', list);
|
|
390
|
+
}
|
|
391
|
+
return list;
|
|
392
|
+
}
|
|
393
|
+
/**
|
|
394
|
+
* Extracts list item information from Word specific list-like element style:
|
|
395
|
+
*
|
|
396
|
+
* ```
|
|
397
|
+
* `style="mso-list:l1 level1 lfo1"`
|
|
398
|
+
* ```
|
|
399
|
+
*
|
|
400
|
+
* where:
|
|
401
|
+
*
|
|
402
|
+
* ```
|
|
403
|
+
* * `l1` is a list id (however it does not mean this is a continuous list - see #43),
|
|
404
|
+
* * `level1` is a list item indentation level,
|
|
405
|
+
* * `lfo1` is a list insertion order in a document.
|
|
406
|
+
* ```
|
|
407
|
+
*
|
|
408
|
+
* @param element Element from which style data is extracted.
|
|
409
|
+
*/ function getListItemData(element) {
|
|
410
|
+
const listStyle = element.getStyle('mso-list');
|
|
411
|
+
if (listStyle === undefined) {
|
|
412
|
+
return {};
|
|
413
|
+
}
|
|
414
|
+
const idMatch = listStyle.match(/(^|\s{1,100})l(\d+)/i);
|
|
415
|
+
const orderMatch = listStyle.match(/\s{0,100}lfo(\d+)/i);
|
|
416
|
+
const indentMatch = listStyle.match(/\s{0,100}level(\d+)/i);
|
|
417
|
+
if (idMatch && orderMatch && indentMatch) {
|
|
418
|
+
return {
|
|
419
|
+
id: idMatch[2],
|
|
420
|
+
order: orderMatch[1],
|
|
421
|
+
indent: parseInt(indentMatch[1])
|
|
422
|
+
};
|
|
423
|
+
}
|
|
424
|
+
return {
|
|
425
|
+
indent: 1 // Handle empty mso-list style as a marked for default list item.
|
|
426
|
+
};
|
|
427
|
+
}
|
|
428
|
+
/**
|
|
429
|
+
* Removes span with a numbering/bullet from a given element.
|
|
430
|
+
*/ function removeBulletElement(element, writer) {
|
|
431
|
+
// Matcher for finding `span` elements holding lists numbering/bullets.
|
|
432
|
+
const bulletMatcher = new Matcher({
|
|
433
|
+
name: 'span',
|
|
434
|
+
styles: {
|
|
435
|
+
'mso-list': 'Ignore'
|
|
436
|
+
}
|
|
437
|
+
});
|
|
438
|
+
const range = writer.createRangeIn(element);
|
|
439
|
+
for (const value of range){
|
|
440
|
+
if (value.type === 'elementStart' && bulletMatcher.match(value.item)) {
|
|
441
|
+
writer.remove(value.item);
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
/**
|
|
446
|
+
* Returns element left margin normalized to 'px' if possible.
|
|
447
|
+
*/ function getMarginLeftNormalized(element) {
|
|
448
|
+
const value = element.getStyle('margin-left');
|
|
449
|
+
if (value === undefined || value.endsWith('px')) {
|
|
450
|
+
return value;
|
|
451
|
+
}
|
|
452
|
+
return convertCssLengthToPx(value);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
/**
|
|
456
|
+
* Replaces source attribute of all `<img>` elements representing regular
|
|
457
|
+
* images (not the Word shapes) with inlined base64 image representation extracted from RTF or Blob data.
|
|
458
|
+
*
|
|
459
|
+
* @param documentFragment Document fragment on which transform images.
|
|
460
|
+
* @param rtfData The RTF data from which images representation will be used.
|
|
461
|
+
*/ function replaceImagesSourceWithBase64(documentFragment, rtfData) {
|
|
462
|
+
if (!documentFragment.childCount) {
|
|
463
|
+
return;
|
|
464
|
+
}
|
|
465
|
+
const upcastWriter = new UpcastWriter(documentFragment.document);
|
|
466
|
+
const shapesIds = findAllShapesIds(documentFragment, upcastWriter);
|
|
467
|
+
removeAllImgElementsRepresentingShapes(shapesIds, documentFragment, upcastWriter);
|
|
468
|
+
insertMissingImgs(shapesIds, documentFragment, upcastWriter);
|
|
469
|
+
removeAllShapeElements(documentFragment, upcastWriter);
|
|
470
|
+
const images = findAllImageElementsWithLocalSource(documentFragment, upcastWriter);
|
|
471
|
+
if (images.length) {
|
|
472
|
+
replaceImagesFileSourceWithInlineRepresentation(images, extractImageDataFromRtf(rtfData), upcastWriter);
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
/**
|
|
476
|
+
* Converts given HEX string to base64 representation.
|
|
477
|
+
*
|
|
478
|
+
* @internal
|
|
479
|
+
* @param hexString The HEX string to be converted.
|
|
480
|
+
* @returns Base64 representation of a given HEX string.
|
|
481
|
+
*/ function _convertHexToBase64(hexString) {
|
|
482
|
+
return btoa(hexString.match(/\w{2}/g).map((char)=>{
|
|
483
|
+
return String.fromCharCode(parseInt(char, 16));
|
|
484
|
+
}).join(''));
|
|
485
|
+
}
|
|
486
|
+
/**
|
|
487
|
+
* Finds all shapes (`<v:*>...</v:*>`) ids. Shapes can represent images (canvas)
|
|
488
|
+
* or Word shapes (which does not have RTF or Blob representation).
|
|
489
|
+
*
|
|
490
|
+
* @param documentFragment Document fragment from which to extract shape ids.
|
|
491
|
+
* @returns Array of shape ids.
|
|
492
|
+
*/ function findAllShapesIds(documentFragment, writer) {
|
|
493
|
+
const range = writer.createRangeIn(documentFragment);
|
|
494
|
+
const shapeElementsMatcher = new Matcher({
|
|
495
|
+
name: /v:(.+)/
|
|
496
|
+
});
|
|
497
|
+
const shapesIds = [];
|
|
498
|
+
for (const value of range){
|
|
499
|
+
if (value.type != 'elementStart') {
|
|
500
|
+
continue;
|
|
501
|
+
}
|
|
502
|
+
const el = value.item;
|
|
503
|
+
const previousSibling = el.previousSibling;
|
|
504
|
+
const prevSiblingName = previousSibling && previousSibling.is('element') ? previousSibling.name : null;
|
|
505
|
+
// List of ids which should not be considered as shapes.
|
|
506
|
+
// https://github.com/ckeditor/ckeditor5/pull/15847#issuecomment-1941543983
|
|
507
|
+
const exceptionIds = [
|
|
508
|
+
'Chart'
|
|
509
|
+
];
|
|
510
|
+
const isElementAShape = shapeElementsMatcher.match(el);
|
|
511
|
+
const hasElementGfxdataAttribute = el.getAttribute('o:gfxdata');
|
|
512
|
+
const isPreviousSiblingAShapeType = prevSiblingName === 'v:shapetype';
|
|
513
|
+
const isElementIdInExceptionsArray = hasElementGfxdataAttribute && exceptionIds.some((item)=>el.getAttribute('id').includes(item));
|
|
514
|
+
// If shape element has 'o:gfxdata' attribute and is not directly before
|
|
515
|
+
// `<v:shapetype>` element it means that it represents a Word shape.
|
|
516
|
+
if (isElementAShape && hasElementGfxdataAttribute && !isPreviousSiblingAShapeType && !isElementIdInExceptionsArray) {
|
|
517
|
+
shapesIds.push(value.item.getAttribute('id'));
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
return shapesIds;
|
|
521
|
+
}
|
|
522
|
+
/**
|
|
523
|
+
* Removes all `<img>` elements which represents Word shapes and not regular images.
|
|
524
|
+
*
|
|
525
|
+
* @param shapesIds Shape ids which will be checked against `<img>` elements.
|
|
526
|
+
* @param documentFragment Document fragment from which to remove `<img>` elements.
|
|
527
|
+
*/ function removeAllImgElementsRepresentingShapes(shapesIds, documentFragment, writer) {
|
|
528
|
+
const range = writer.createRangeIn(documentFragment);
|
|
529
|
+
const imageElementsMatcher = new Matcher({
|
|
530
|
+
name: 'img'
|
|
531
|
+
});
|
|
532
|
+
const imgs = [];
|
|
533
|
+
for (const value of range){
|
|
534
|
+
if (value.item.is('element') && imageElementsMatcher.match(value.item)) {
|
|
535
|
+
const el = value.item;
|
|
536
|
+
const shapes = el.getAttribute('v:shapes') ? el.getAttribute('v:shapes').split(' ') : [];
|
|
537
|
+
if (shapes.length && shapes.every((shape)=>shapesIds.indexOf(shape) > -1)) {
|
|
538
|
+
imgs.push(el);
|
|
539
|
+
// Shapes may also have empty source while content is paste in some browsers (Safari).
|
|
540
|
+
} else if (!el.getAttribute('src')) {
|
|
541
|
+
imgs.push(el);
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
for (const img of imgs){
|
|
546
|
+
writer.remove(img);
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
/**
|
|
550
|
+
* Removes all shape elements (`<v:*>...</v:*>`) so they do not pollute the output structure.
|
|
551
|
+
*
|
|
552
|
+
* @param documentFragment Document fragment from which to remove shape elements.
|
|
553
|
+
*/ function removeAllShapeElements(documentFragment, writer) {
|
|
554
|
+
const range = writer.createRangeIn(documentFragment);
|
|
555
|
+
const shapeElementsMatcher = new Matcher({
|
|
556
|
+
name: /v:(.+)/
|
|
557
|
+
});
|
|
558
|
+
const shapes = [];
|
|
559
|
+
for (const value of range){
|
|
560
|
+
if (value.type == 'elementStart' && shapeElementsMatcher.match(value.item)) {
|
|
561
|
+
shapes.push(value.item);
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
for (const shape of shapes){
|
|
565
|
+
writer.remove(shape);
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
/**
|
|
569
|
+
* Inserts `img` tags if there is none after a shape.
|
|
570
|
+
*/ function insertMissingImgs(shapeIds, documentFragment, writer) {
|
|
571
|
+
const range = writer.createRangeIn(documentFragment);
|
|
572
|
+
const shapes = [];
|
|
573
|
+
for (const value of range){
|
|
574
|
+
if (value.type == 'elementStart' && value.item.is('element', 'v:shape')) {
|
|
575
|
+
const id = value.item.getAttribute('id');
|
|
576
|
+
if (shapeIds.includes(id)) {
|
|
577
|
+
continue;
|
|
578
|
+
}
|
|
579
|
+
if (!containsMatchingImg(value.item.parent.getChildren(), id)) {
|
|
580
|
+
shapes.push(value.item);
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
for (const shape of shapes){
|
|
585
|
+
const attrs = {
|
|
586
|
+
src: findSrc(shape)
|
|
587
|
+
};
|
|
588
|
+
if (shape.hasAttribute('alt')) {
|
|
589
|
+
attrs.alt = shape.getAttribute('alt');
|
|
590
|
+
}
|
|
591
|
+
const img = writer.createElement('img', attrs);
|
|
592
|
+
writer.insertChild(shape.index + 1, img, shape.parent);
|
|
593
|
+
}
|
|
594
|
+
function containsMatchingImg(nodes, id) {
|
|
595
|
+
for (const node of nodes){
|
|
596
|
+
/* istanbul ignore else -- @preserve */ if (node.is('element')) {
|
|
597
|
+
if (node.name == 'img' && node.getAttribute('v:shapes') == id) {
|
|
598
|
+
return true;
|
|
599
|
+
}
|
|
600
|
+
if (containsMatchingImg(node.getChildren(), id)) {
|
|
601
|
+
return true;
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
return false;
|
|
606
|
+
}
|
|
607
|
+
function findSrc(shape) {
|
|
608
|
+
for (const child of shape.getChildren()){
|
|
609
|
+
/* istanbul ignore else -- @preserve */ if (child.is('element') && child.getAttribute('src')) {
|
|
610
|
+
return child.getAttribute('src');
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
/**
|
|
616
|
+
* Finds all `<img>` elements in a given document fragment which have source pointing to local `file://` resource.
|
|
617
|
+
*
|
|
618
|
+
* @param documentFragment Document fragment in which to look for `<img>` elements.
|
|
619
|
+
* @returns result All found images grouped by source type.
|
|
620
|
+
*/ function findAllImageElementsWithLocalSource(documentFragment, writer) {
|
|
621
|
+
const range = writer.createRangeIn(documentFragment);
|
|
622
|
+
const imageElementsMatcher = new Matcher({
|
|
623
|
+
name: 'img'
|
|
624
|
+
});
|
|
625
|
+
const imgs = [];
|
|
626
|
+
for (const value of range){
|
|
627
|
+
if (value.item.is('element') && imageElementsMatcher.match(value.item)) {
|
|
628
|
+
if (value.item.getAttribute('src').startsWith('file://')) {
|
|
629
|
+
imgs.push(value.item);
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
return imgs;
|
|
634
|
+
}
|
|
635
|
+
/**
|
|
636
|
+
* Extracts all images HEX representations from a given RTF data.
|
|
637
|
+
*
|
|
638
|
+
* @param rtfData The RTF data from which to extract images HEX representation.
|
|
639
|
+
* @returns Array of found HEX representations. Each array item is an object containing:
|
|
640
|
+
*
|
|
641
|
+
* * hex Image representation in HEX format.
|
|
642
|
+
* * type Type of image, `image/png` or `image/jpeg`.
|
|
643
|
+
*/ function extractImageDataFromRtf(rtfData) {
|
|
644
|
+
if (!rtfData) {
|
|
645
|
+
return [];
|
|
646
|
+
}
|
|
647
|
+
const regexPictureHeader = /{\\pict[\s\S]+?\\bliptag-?\d+(\\blipupi-?\d+)?({\\\*\\blipuid\s?[\da-fA-F]+)?[\s}]*?/;
|
|
648
|
+
const regexPicture = new RegExp('(?:(' + regexPictureHeader.source + '))([\\da-fA-F\\s]+)\\}', 'g');
|
|
649
|
+
const images = rtfData.match(regexPicture);
|
|
650
|
+
const result = [];
|
|
651
|
+
if (images) {
|
|
652
|
+
for (const image of images){
|
|
653
|
+
let imageType = false;
|
|
654
|
+
if (image.includes('\\pngblip')) {
|
|
655
|
+
imageType = 'image/png';
|
|
656
|
+
} else if (image.includes('\\jpegblip')) {
|
|
657
|
+
imageType = 'image/jpeg';
|
|
658
|
+
}
|
|
659
|
+
if (imageType) {
|
|
660
|
+
result.push({
|
|
661
|
+
hex: image.replace(regexPictureHeader, '').replace(/[^\da-fA-F]/g, ''),
|
|
662
|
+
type: imageType
|
|
663
|
+
});
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
return result;
|
|
668
|
+
}
|
|
669
|
+
/**
|
|
670
|
+
* Replaces `src` attribute value of all given images with the corresponding base64 image representation.
|
|
671
|
+
*
|
|
672
|
+
* @param imageElements Array of image elements which will have its source replaced.
|
|
673
|
+
* @param imagesHexSources Array of images hex sources (usually the result of `extractImageDataFromRtf()` function).
|
|
674
|
+
* The array should be the same length as `imageElements` parameter.
|
|
675
|
+
*/ function replaceImagesFileSourceWithInlineRepresentation(imageElements, imagesHexSources, writer) {
|
|
676
|
+
// Assume there is an equal amount of image elements and images HEX sources so they can be matched accordingly based on existing order.
|
|
677
|
+
if (imageElements.length === imagesHexSources.length) {
|
|
678
|
+
for(let i = 0; i < imageElements.length; i++){
|
|
679
|
+
const newSrc = `data:${imagesHexSources[i].type};base64,${_convertHexToBase64(imagesHexSources[i].hex)}`;
|
|
680
|
+
writer.setAttribute('src', newSrc, imageElements[i]);
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
/**
|
|
686
|
+
* Cleanup MS attributes like styles, attributes and elements.
|
|
687
|
+
*
|
|
688
|
+
* @param documentFragment element `data.content` obtained from clipboard.
|
|
689
|
+
*/ function removeMSAttributes(documentFragment) {
|
|
690
|
+
const elementsToUnwrap = [];
|
|
691
|
+
const writer = new UpcastWriter(documentFragment.document);
|
|
692
|
+
for (const { item } of writer.createRangeIn(documentFragment)){
|
|
693
|
+
if (!item.is('element')) {
|
|
694
|
+
continue;
|
|
695
|
+
}
|
|
696
|
+
for (const className of item.getClassNames()){
|
|
697
|
+
if (/\bmso/gi.exec(className)) {
|
|
698
|
+
writer.removeClass(className, item);
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
for (const styleName of item.getStyleNames()){
|
|
702
|
+
if (/\bmso/gi.exec(styleName)) {
|
|
703
|
+
writer.removeStyle(styleName, item);
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
if (item.is('element', 'w:sdt') || item.is('element', 'w:sdtpr') && item.isEmpty || item.is('element', 'o:p') && item.isEmpty) {
|
|
707
|
+
elementsToUnwrap.push(item);
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
for (const item of elementsToUnwrap){
|
|
711
|
+
const itemParent = item.parent;
|
|
712
|
+
const childIndex = itemParent.getChildIndex(item);
|
|
713
|
+
writer.insertChild(childIndex, item.getChildren(), itemParent);
|
|
714
|
+
writer.remove(item);
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
const msWordMatch1 = /<meta\s*name="?generator"?\s*content="?microsoft\s*word\s*\d+"?\/?>/i;
|
|
719
|
+
const msWordMatch2 = /xmlns:o="urn:schemas-microsoft-com/i;
|
|
720
|
+
class MSWordNormalizer {
|
|
721
|
+
/**
|
|
722
|
+
* @inheritDoc
|
|
723
|
+
*/ isActive(htmlString) {
|
|
724
|
+
return msWordMatch1.test(htmlString) || msWordMatch2.test(htmlString);
|
|
725
|
+
}
|
|
726
|
+
/**
|
|
727
|
+
* @inheritDoc
|
|
728
|
+
*/ execute(data) {
|
|
729
|
+
const { body: documentFragment, stylesString } = data._parsedData;
|
|
730
|
+
transformListItemLikeElementsIntoLists(documentFragment, stylesString, this.hasMultiLevelListPlugin);
|
|
731
|
+
replaceImagesSourceWithBase64(documentFragment, data.dataTransfer.getData('text/rtf'));
|
|
732
|
+
removeMSAttributes(documentFragment);
|
|
733
|
+
data.content = documentFragment;
|
|
734
|
+
}
|
|
735
|
+
/**
|
|
736
|
+
* Creates a new `MSWordNormalizer` instance.
|
|
737
|
+
*
|
|
738
|
+
* @param document View document.
|
|
739
|
+
*/ constructor(document, hasMultiLevelListPlugin = false){
|
|
740
|
+
this.document = document;
|
|
741
|
+
this.hasMultiLevelListPlugin = hasMultiLevelListPlugin;
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
/**
|
|
746
|
+
* @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
|
|
747
|
+
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
|
|
748
|
+
*/ /**
|
|
749
|
+
* Removes the `<b>` tag wrapper added by Google Docs to a copied content.
|
|
750
|
+
*
|
|
751
|
+
* @param documentFragment element `data.content` obtained from clipboard
|
|
752
|
+
*/ function removeBoldWrapper(documentFragment, writer) {
|
|
753
|
+
for (const child of documentFragment.getChildren()){
|
|
754
|
+
if (child.is('element', 'b') && child.getStyle('font-weight') === 'normal') {
|
|
755
|
+
const childIndex = documentFragment.getChildIndex(child);
|
|
756
|
+
writer.remove(child);
|
|
757
|
+
writer.insertChild(childIndex, child.getChildren(), documentFragment);
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
/**
|
|
763
|
+
* Transforms `<br>` elements that are siblings to some block element into a paragraphs.
|
|
764
|
+
*
|
|
765
|
+
* @param documentFragment The view structure to be transformed.
|
|
766
|
+
*/ function transformBlockBrsToParagraphs(documentFragment, writer) {
|
|
767
|
+
const viewDocument = new ViewDocument(writer.document.stylesProcessor);
|
|
768
|
+
const domConverter = new DomConverter(viewDocument, {
|
|
769
|
+
renderingMode: 'data'
|
|
770
|
+
});
|
|
771
|
+
const blockElements = domConverter.blockElements;
|
|
772
|
+
const inlineObjectElements = domConverter.inlineObjectElements;
|
|
773
|
+
const elementsToReplace = [];
|
|
774
|
+
for (const value of writer.createRangeIn(documentFragment)){
|
|
775
|
+
const element = value.item;
|
|
776
|
+
if (element.is('element', 'br')) {
|
|
777
|
+
const nextSibling = findSibling(element, 'forward', writer, {
|
|
778
|
+
blockElements,
|
|
779
|
+
inlineObjectElements
|
|
780
|
+
});
|
|
781
|
+
const previousSibling = findSibling(element, 'backward', writer, {
|
|
782
|
+
blockElements,
|
|
783
|
+
inlineObjectElements
|
|
784
|
+
});
|
|
785
|
+
const nextSiblingIsBlock = isBlockViewElement(nextSibling, blockElements);
|
|
786
|
+
const previousSiblingIsBlock = isBlockViewElement(previousSibling, blockElements);
|
|
787
|
+
// If the <br> is surrounded by blocks then convert it to a paragraph:
|
|
788
|
+
// * <p>foo</p>[<br>]<p>bar</p> -> <p>foo</p>[<p></p>]<p>bar</p>
|
|
789
|
+
// * <p>foo</p>[<br>] -> <p>foo</p>[<p></p>]
|
|
790
|
+
// * [<br>]<p>foo</p> -> [<p></p>]<p>foo</p>
|
|
791
|
+
if (previousSiblingIsBlock || nextSiblingIsBlock) {
|
|
792
|
+
elementsToReplace.push(element);
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
for (const element of elementsToReplace){
|
|
797
|
+
if (element.hasClass('Apple-interchange-newline')) {
|
|
798
|
+
writer.remove(element);
|
|
799
|
+
} else {
|
|
800
|
+
writer.replace(element, writer.createElement('p'));
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
/**
|
|
805
|
+
* Returns sibling node, threats inline elements as transparent (but should stop on an inline objects).
|
|
806
|
+
*/ function findSibling(viewElement, direction, writer, { blockElements, inlineObjectElements }) {
|
|
807
|
+
let position = writer.createPositionAt(viewElement, direction == 'forward' ? 'after' : 'before');
|
|
808
|
+
// Find first position that is just before a first:
|
|
809
|
+
// * text node,
|
|
810
|
+
// * block element,
|
|
811
|
+
// * inline object element.
|
|
812
|
+
// It's ignoring any inline (non-object) elements like span, strong, etc.
|
|
813
|
+
position = position.getLastMatchingPosition(({ item })=>item.is('element') && !blockElements.includes(item.name) && !inlineObjectElements.includes(item.name), {
|
|
814
|
+
direction
|
|
815
|
+
});
|
|
816
|
+
return direction == 'forward' ? position.nodeAfter : position.nodeBefore;
|
|
817
|
+
}
|
|
818
|
+
/**
|
|
819
|
+
* Returns true for view elements that are listed as block view elements.
|
|
820
|
+
*/ function isBlockViewElement(node, blockElements) {
|
|
821
|
+
return !!node && node.is('element') && blockElements.includes(node.name);
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
const googleDocsMatch = /id=("|')docs-internal-guid-[-0-9a-f]+("|')/i;
|
|
825
|
+
class GoogleDocsNormalizer {
|
|
826
|
+
/**
|
|
827
|
+
* @inheritDoc
|
|
828
|
+
*/ isActive(htmlString) {
|
|
829
|
+
return googleDocsMatch.test(htmlString);
|
|
830
|
+
}
|
|
831
|
+
/**
|
|
832
|
+
* @inheritDoc
|
|
833
|
+
*/ execute(data) {
|
|
834
|
+
const writer = new UpcastWriter(this.document);
|
|
835
|
+
const { body: documentFragment } = data._parsedData;
|
|
836
|
+
removeBoldWrapper(documentFragment, writer);
|
|
837
|
+
unwrapParagraphInListItem(documentFragment, writer);
|
|
838
|
+
transformBlockBrsToParagraphs(documentFragment, writer);
|
|
839
|
+
data.content = documentFragment;
|
|
840
|
+
}
|
|
841
|
+
/**
|
|
842
|
+
* Creates a new `GoogleDocsNormalizer` instance.
|
|
843
|
+
*
|
|
844
|
+
* @param document View document.
|
|
845
|
+
*/ constructor(document){
|
|
846
|
+
this.document = document;
|
|
847
|
+
}
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
/**
|
|
851
|
+
* @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
|
|
852
|
+
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
|
|
853
|
+
*/ /**
|
|
854
|
+
* Removes the `xmlns` attribute from table pasted from Google Sheets.
|
|
855
|
+
*
|
|
856
|
+
* @param documentFragment element `data.content` obtained from clipboard
|
|
857
|
+
*/ function removeXmlns(documentFragment, writer) {
|
|
858
|
+
for (const child of documentFragment.getChildren()){
|
|
859
|
+
if (child.is('element', 'table') && child.hasAttribute('xmlns')) {
|
|
860
|
+
writer.removeAttribute('xmlns', child);
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
/**
|
|
866
|
+
* @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
|
|
867
|
+
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
|
|
868
|
+
*/ /**
|
|
869
|
+
* Removes the `<google-sheets-html-origin>` tag wrapper added by Google Sheets to a copied content.
|
|
870
|
+
*
|
|
871
|
+
* @param documentFragment element `data.content` obtained from clipboard
|
|
872
|
+
*/ function removeGoogleSheetsTag(documentFragment, writer) {
|
|
873
|
+
for (const child of documentFragment.getChildren()){
|
|
874
|
+
if (child.is('element', 'google-sheets-html-origin')) {
|
|
875
|
+
const childIndex = documentFragment.getChildIndex(child);
|
|
876
|
+
writer.remove(child);
|
|
877
|
+
writer.insertChild(childIndex, child.getChildren(), documentFragment);
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
/**
|
|
883
|
+
* @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
|
|
884
|
+
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
|
|
885
|
+
*/ /**
|
|
886
|
+
* Removes the `width:0px` style from table pasted from Google Sheets.
|
|
887
|
+
*
|
|
888
|
+
* @param documentFragment element `data.content` obtained from clipboard
|
|
889
|
+
*/ function removeInvalidTableWidth(documentFragment, writer) {
|
|
890
|
+
for (const child of documentFragment.getChildren()){
|
|
891
|
+
if (child.is('element', 'table') && child.getStyle('width') === '0px') {
|
|
892
|
+
writer.removeStyle('width', child);
|
|
893
|
+
}
|
|
894
|
+
}
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
/**
|
|
898
|
+
* @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
|
|
899
|
+
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
|
|
900
|
+
*/ /**
|
|
901
|
+
* Removes `<style>` block added by Google Sheets to a copied content.
|
|
902
|
+
*
|
|
903
|
+
* @param documentFragment element `data.content` obtained from clipboard
|
|
904
|
+
*/ function removeStyleBlock(documentFragment, writer) {
|
|
905
|
+
for (const child of Array.from(documentFragment.getChildren())){
|
|
906
|
+
if (child.is('element', 'style')) {
|
|
907
|
+
writer.remove(child);
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
const googleSheetsMatch = /<google-sheets-html-origin/i;
|
|
913
|
+
class GoogleSheetsNormalizer {
|
|
914
|
+
/**
|
|
915
|
+
* @inheritDoc
|
|
916
|
+
*/ isActive(htmlString) {
|
|
917
|
+
return googleSheetsMatch.test(htmlString);
|
|
918
|
+
}
|
|
919
|
+
/**
|
|
920
|
+
* @inheritDoc
|
|
921
|
+
*/ execute(data) {
|
|
922
|
+
const writer = new UpcastWriter(this.document);
|
|
923
|
+
const { body: documentFragment } = data._parsedData;
|
|
924
|
+
removeGoogleSheetsTag(documentFragment, writer);
|
|
925
|
+
removeXmlns(documentFragment, writer);
|
|
926
|
+
removeInvalidTableWidth(documentFragment, writer);
|
|
927
|
+
removeStyleBlock(documentFragment, writer);
|
|
928
|
+
data.content = documentFragment;
|
|
929
|
+
}
|
|
930
|
+
/**
|
|
931
|
+
* Creates a new `GoogleSheetsNormalizer` instance.
|
|
932
|
+
*
|
|
933
|
+
* @param document View document.
|
|
934
|
+
*/ constructor(document){
|
|
935
|
+
this.document = document;
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
/**
|
|
940
|
+
* @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved.
|
|
941
|
+
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
|
|
942
|
+
*/ /**
|
|
943
|
+
* @module paste-from-office/filters/space
|
|
944
|
+
*/ /**
|
|
945
|
+
* Replaces last space preceding elements closing tag with ` `. Such operation prevents spaces from being removed
|
|
946
|
+
* during further DOM/View processing (see especially {@link module:engine/view/domconverter~DomConverter#_processDomInlineNodes}).
|
|
947
|
+
* This method also takes into account Word specific `<o:p></o:p>` empty tags.
|
|
948
|
+
* Additionally multiline sequences of spaces and new lines between tags are removed (see #39 and #40).
|
|
949
|
+
*
|
|
950
|
+
* @param htmlString HTML string in which spacing should be normalized.
|
|
951
|
+
* @returns Input HTML with spaces normalized.
|
|
952
|
+
*/ function normalizeSpacing(htmlString) {
|
|
953
|
+
// Run normalizeSafariSpaceSpans() two times to cover nested spans.
|
|
954
|
+
return normalizeSafariSpaceSpans(normalizeSafariSpaceSpans(htmlString))// Remove all \r\n from "spacerun spans" so the last replace line doesn't strip all whitespaces.
|
|
955
|
+
.replace(/(<span\s+style=['"]mso-spacerun:yes['"]>[^\S\r\n]*?)[\r\n]+([^\S\r\n]*<\/span>)/g, '$1$2').replace(/<span\s+style=['"]mso-spacerun:yes['"]><\/span>/g, '').replace(/(<span\s+style=['"]letter-spacing:[^'"]+?['"]>)[\r\n]+(<\/span>)/g, '$1 $2').replace(/ <\//g, '\u00A0</').replace(/ <o:p><\/o:p>/g, '\u00A0<o:p></o:p>')// Remove <o:p> block filler from empty paragraph. Safari uses \u00A0 instead of .
|
|
956
|
+
.replace(/<o:p>( |\u00A0)<\/o:p>/g, '')// Remove all whitespaces when they contain any \r or \n.
|
|
957
|
+
.replace(/>([^\S\r\n]*[\r\n]\s*)</g, '><');
|
|
958
|
+
}
|
|
959
|
+
/**
|
|
960
|
+
* Normalizes spacing in special Word `spacerun spans` (`<span style='mso-spacerun:yes'>\s+</span>`) by replacing
|
|
961
|
+
* all spaces with ` ` pairs. This prevents spaces from being removed during further DOM/View processing
|
|
962
|
+
* (see especially {@link module:engine/view/domconverter~DomConverter#_processDomInlineNodes}).
|
|
963
|
+
*
|
|
964
|
+
* @param htmlDocument Native `Document` object in which spacing should be normalized.
|
|
965
|
+
*/ function normalizeSpacerunSpans(htmlDocument) {
|
|
966
|
+
htmlDocument.querySelectorAll('span[style*=spacerun]').forEach((el)=>{
|
|
967
|
+
const htmlElement = el;
|
|
968
|
+
const innerTextLength = htmlElement.innerText.length || 0;
|
|
969
|
+
htmlElement.innerText = Array(innerTextLength + 1).join('\u00A0 ').substr(0, innerTextLength);
|
|
970
|
+
});
|
|
971
|
+
}
|
|
972
|
+
/**
|
|
973
|
+
* Normalizes specific spacing generated by Safari when content pasted from Word (`<span class="Apple-converted-space"> </span>`)
|
|
974
|
+
* by replacing all spaces sequences longer than 1 space with ` ` pairs. This prevents spaces from being removed during
|
|
975
|
+
* further DOM/View processing (see especially {@link module:engine/view/domconverter~DomConverter#_processDataFromDomText}).
|
|
976
|
+
*
|
|
977
|
+
* This function is similar to {@link module:clipboard/utils/normalizeclipboarddata normalizeClipboardData util} but uses
|
|
978
|
+
* regular spaces / sequence for replacement.
|
|
979
|
+
*
|
|
980
|
+
* @param htmlString HTML string in which spacing should be normalized
|
|
981
|
+
* @returns Input HTML with spaces normalized.
|
|
982
|
+
*/ function normalizeSafariSpaceSpans(htmlString) {
|
|
983
|
+
return htmlString.replace(/<span(?: class="Apple-converted-space"|)>(\s+)<\/span>/g, (fullMatch, spaces)=>{
|
|
984
|
+
return spaces.length === 1 ? ' ' : Array(spaces.length + 1).join('\u00A0 ').substr(0, spaces.length);
|
|
985
|
+
});
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
/**
|
|
989
|
+
* Parses the provided HTML extracting contents of `<body>` and `<style>` tags.
|
|
990
|
+
*
|
|
991
|
+
* @param htmlString HTML string to be parsed.
|
|
992
|
+
*/ function parseHtml(htmlString, stylesProcessor) {
|
|
993
|
+
const domParser = new DOMParser();
|
|
994
|
+
// Remove Word specific "if comments" so content inside is not omitted by the parser.
|
|
995
|
+
htmlString = htmlString.replace(/<!--\[if gte vml 1]>/g, '');
|
|
996
|
+
// Clean the <head> section of MS Windows specific tags. See https://github.com/ckeditor/ckeditor5/issues/15333.
|
|
997
|
+
// The regular expression matches the <o:SmartTagType> tag with optional attributes (with or without values).
|
|
998
|
+
htmlString = htmlString.replace(/<o:SmartTagType(?:\s+[^\s>=]+(?:="[^"]*")?)*\s*\/?>/gi, '');
|
|
999
|
+
const normalizedHtml = normalizeSpacing(cleanContentAfterBody(htmlString));
|
|
1000
|
+
// Parse htmlString as native Document object.
|
|
1001
|
+
const htmlDocument = domParser.parseFromString(normalizedHtml, 'text/html');
|
|
1002
|
+
normalizeSpacerunSpans(htmlDocument);
|
|
1003
|
+
// Get `innerHTML` first as transforming to View modifies the source document.
|
|
1004
|
+
const bodyString = htmlDocument.body.innerHTML;
|
|
1005
|
+
// Transform document.body to View.
|
|
1006
|
+
const bodyView = documentToView(htmlDocument, stylesProcessor);
|
|
1007
|
+
// Extract stylesheets.
|
|
1008
|
+
const stylesObject = extractStyles(htmlDocument);
|
|
1009
|
+
return {
|
|
1010
|
+
body: bodyView,
|
|
1011
|
+
bodyString,
|
|
1012
|
+
styles: stylesObject.styles,
|
|
1013
|
+
stylesString: stylesObject.stylesString
|
|
1014
|
+
};
|
|
1015
|
+
}
|
|
1016
|
+
/**
|
|
1017
|
+
* Transforms native `Document` object into {@link module:engine/view/documentfragment~DocumentFragment}. Comments are skipped.
|
|
1018
|
+
*
|
|
1019
|
+
* @param htmlDocument Native `Document` object to be transformed.
|
|
1020
|
+
*/ function documentToView(htmlDocument, stylesProcessor) {
|
|
1021
|
+
const viewDocument = new ViewDocument(stylesProcessor);
|
|
1022
|
+
const domConverter = new DomConverter(viewDocument, {
|
|
1023
|
+
renderingMode: 'data'
|
|
1024
|
+
});
|
|
1025
|
+
const fragment = htmlDocument.createDocumentFragment();
|
|
1026
|
+
const nodes = htmlDocument.body.childNodes;
|
|
1027
|
+
while(nodes.length > 0){
|
|
1028
|
+
fragment.appendChild(nodes[0]);
|
|
1029
|
+
}
|
|
1030
|
+
return domConverter.domToView(fragment, {
|
|
1031
|
+
skipComments: true
|
|
1032
|
+
});
|
|
1033
|
+
}
|
|
1034
|
+
/**
|
|
1035
|
+
* Extracts both `CSSStyleSheet` and string representation from all `style` elements available in a provided `htmlDocument`.
|
|
1036
|
+
*
|
|
1037
|
+
* @param htmlDocument Native `Document` object from which styles will be extracted.
|
|
1038
|
+
*/ function extractStyles(htmlDocument) {
|
|
1039
|
+
const styles = [];
|
|
1040
|
+
const stylesString = [];
|
|
1041
|
+
const styleTags = Array.from(htmlDocument.getElementsByTagName('style'));
|
|
1042
|
+
for (const style of styleTags){
|
|
1043
|
+
if (style.sheet && style.sheet.cssRules && style.sheet.cssRules.length) {
|
|
1044
|
+
styles.push(style.sheet);
|
|
1045
|
+
stylesString.push(style.innerHTML);
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
return {
|
|
1049
|
+
styles,
|
|
1050
|
+
stylesString: stylesString.join(' ')
|
|
1051
|
+
};
|
|
1052
|
+
}
|
|
1053
|
+
/**
|
|
1054
|
+
* Removes leftover content from between closing </body> and closing </html> tag:
|
|
1055
|
+
*
|
|
1056
|
+
* ```html
|
|
1057
|
+
* <html><body><p>Foo Bar</p></body><span>Fo</span></html> -> <html><body><p>Foo Bar</p></body></html>
|
|
1058
|
+
* ```
|
|
1059
|
+
*
|
|
1060
|
+
* This function is used as specific browsers (Edge) add some random content after `body` tag when pasting from Word.
|
|
1061
|
+
* @param htmlString The HTML string to be cleaned.
|
|
1062
|
+
* @returns The HTML string with leftover content removed.
|
|
1063
|
+
*/ function cleanContentAfterBody(htmlString) {
|
|
1064
|
+
const bodyCloseTag = '</body>';
|
|
1065
|
+
const htmlCloseTag = '</html>';
|
|
1066
|
+
const bodyCloseIndex = htmlString.indexOf(bodyCloseTag);
|
|
1067
|
+
if (bodyCloseIndex < 0) {
|
|
1068
|
+
return htmlString;
|
|
1069
|
+
}
|
|
1070
|
+
const htmlCloseIndex = htmlString.indexOf(htmlCloseTag, bodyCloseIndex + bodyCloseTag.length);
|
|
1071
|
+
return htmlString.substring(0, bodyCloseIndex + bodyCloseTag.length) + (htmlCloseIndex >= 0 ? htmlString.substring(htmlCloseIndex) : '');
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
class PasteFromOffice extends Plugin {
|
|
1075
|
+
/**
|
|
1076
|
+
* @inheritDoc
|
|
1077
|
+
*/ static get pluginName() {
|
|
1078
|
+
return 'PasteFromOffice';
|
|
1079
|
+
}
|
|
1080
|
+
/**
|
|
1081
|
+
* @inheritDoc
|
|
1082
|
+
*/ static get requires() {
|
|
1083
|
+
return [
|
|
1084
|
+
ClipboardPipeline
|
|
1085
|
+
];
|
|
1086
|
+
}
|
|
1087
|
+
/**
|
|
1088
|
+
* @inheritDoc
|
|
1089
|
+
*/ init() {
|
|
1090
|
+
const editor = this.editor;
|
|
1091
|
+
const clipboardPipeline = editor.plugins.get('ClipboardPipeline');
|
|
1092
|
+
const viewDocument = editor.editing.view.document;
|
|
1093
|
+
const normalizers = [];
|
|
1094
|
+
const hasMultiLevelListPlugin = this.editor.plugins.has('MultiLevelList');
|
|
1095
|
+
normalizers.push(new MSWordNormalizer(viewDocument, hasMultiLevelListPlugin));
|
|
1096
|
+
normalizers.push(new GoogleDocsNormalizer(viewDocument));
|
|
1097
|
+
normalizers.push(new GoogleSheetsNormalizer(viewDocument));
|
|
1098
|
+
clipboardPipeline.on('inputTransformation', (evt, data)=>{
|
|
1099
|
+
if (data._isTransformedWithPasteFromOffice) {
|
|
1100
|
+
return;
|
|
1101
|
+
}
|
|
1102
|
+
const codeBlock = editor.model.document.selection.getFirstPosition().parent;
|
|
1103
|
+
if (codeBlock.is('element', 'codeBlock')) {
|
|
1104
|
+
return;
|
|
1105
|
+
}
|
|
1106
|
+
const htmlString = data.dataTransfer.getData('text/html');
|
|
1107
|
+
const activeNormalizer = normalizers.find((normalizer)=>normalizer.isActive(htmlString));
|
|
1108
|
+
if (activeNormalizer) {
|
|
1109
|
+
if (!data._parsedData) {
|
|
1110
|
+
data._parsedData = parseHtml(htmlString, viewDocument.stylesProcessor);
|
|
1111
|
+
}
|
|
1112
|
+
activeNormalizer.execute(data);
|
|
1113
|
+
data._isTransformedWithPasteFromOffice = true;
|
|
1114
|
+
}
|
|
1115
|
+
}, {
|
|
1116
|
+
priority: 'high'
|
|
1117
|
+
});
|
|
1118
|
+
}
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
export { MSWordNormalizer, PasteFromOffice, parseHtml };
|
|
1122
|
+
//# sourceMappingURL=index.js.map
|