@ckeditor/ckeditor5-paste-from-office 40.0.0 → 40.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -12
- package/LICENSE.md +2 -2
- package/build/paste-from-office.js +1 -1
- package/package.json +2 -2
- package/src/augmentation.d.ts +10 -10
- package/src/augmentation.js +5 -5
- package/src/filters/br.d.ts +14 -14
- package/src/filters/br.js +65 -65
- package/src/filters/image.d.ts +24 -24
- package/src/filters/image.js +241 -241
- package/src/filters/list.d.ts +26 -26
- package/src/filters/list.js +395 -395
- package/src/filters/parse.d.ts +35 -35
- package/src/filters/parse.js +96 -93
- package/src/filters/removeboldwrapper.d.ts +14 -14
- package/src/filters/removeboldwrapper.js +18 -18
- package/src/filters/removegooglesheetstag.d.ts +14 -14
- package/src/filters/removegooglesheetstag.js +18 -18
- package/src/filters/removeinvalidtablewidth.d.ts +14 -14
- package/src/filters/removeinvalidtablewidth.js +16 -16
- package/src/filters/removemsattributes.d.ts +14 -0
- package/src/filters/removemsattributes.js +41 -0
- package/src/filters/removestyleblock.d.ts +14 -14
- package/src/filters/removestyleblock.js +16 -16
- package/src/filters/removexmlns.d.ts +14 -14
- package/src/filters/removexmlns.js +16 -16
- package/src/filters/space.d.ts +25 -25
- package/src/filters/space.js +60 -60
- package/src/index.d.ts +12 -12
- package/src/index.js +11 -11
- package/src/normalizer.d.ts +30 -30
- package/src/normalizer.js +5 -5
- package/src/normalizers/googledocsnormalizer.d.ts +29 -29
- package/src/normalizers/googledocsnormalizer.js +42 -42
- package/src/normalizers/googlesheetsnormalizer.d.ts +29 -29
- package/src/normalizers/googlesheetsnormalizer.js +44 -44
- package/src/normalizers/mswordnormalizer.d.ts +26 -26
- package/src/normalizers/mswordnormalizer.js +41 -39
- package/src/pastefromoffice.d.ts +36 -36
- package/src/pastefromoffice.js +70 -70
- package/build/paste-from-office.js.map +0 -1
package/src/filters/list.js
CHANGED
|
@@ -1,395 +1,395 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
|
|
3
|
-
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
|
|
4
|
-
*/
|
|
5
|
-
/**
|
|
6
|
-
* @module paste-from-office/filters/list
|
|
7
|
-
*/
|
|
8
|
-
import { Matcher, UpcastWriter } from 'ckeditor5/src/engine';
|
|
9
|
-
/**
|
|
10
|
-
* Transforms Word specific list-like elements to the semantic HTML lists.
|
|
11
|
-
*
|
|
12
|
-
* Lists in Word are represented by block elements with special attributes like:
|
|
13
|
-
*
|
|
14
|
-
* ```xml
|
|
15
|
-
* <p class=MsoListParagraphCxSpFirst style='mso-list:l1 level1 lfo1'>...</p> // Paragraph based list.
|
|
16
|
-
* <h1 style='mso-list:l0 level1 lfo1'>...</h1> // Heading 1 based list.
|
|
17
|
-
* ```
|
|
18
|
-
*
|
|
19
|
-
* @param documentFragment The view structure to be transformed.
|
|
20
|
-
* @param stylesString Styles from which list-like elements styling will be extracted.
|
|
21
|
-
*/
|
|
22
|
-
export function transformListItemLikeElementsIntoLists(documentFragment, stylesString) {
|
|
23
|
-
if (!documentFragment.childCount) {
|
|
24
|
-
return;
|
|
25
|
-
}
|
|
26
|
-
const writer = new UpcastWriter(documentFragment.document);
|
|
27
|
-
const itemLikeElements = findAllItemLikeElements(documentFragment, writer);
|
|
28
|
-
if (!itemLikeElements.length) {
|
|
29
|
-
return;
|
|
30
|
-
}
|
|
31
|
-
let currentList = null;
|
|
32
|
-
let currentIndentation = 1;
|
|
33
|
-
itemLikeElements.forEach((itemLikeElement, i) => {
|
|
34
|
-
const isDifferentList = isNewListNeeded(itemLikeElements[i - 1], itemLikeElement);
|
|
35
|
-
const previousItemLikeElement = isDifferentList ? null : itemLikeElements[i - 1];
|
|
36
|
-
const indentationDifference = getIndentationDifference(previousItemLikeElement, itemLikeElement);
|
|
37
|
-
if (isDifferentList) {
|
|
38
|
-
currentList = null;
|
|
39
|
-
currentIndentation = 1;
|
|
40
|
-
}
|
|
41
|
-
if (!currentList || indentationDifference !== 0) {
|
|
42
|
-
const listStyle = detectListStyle(itemLikeElement, stylesString);
|
|
43
|
-
if (!currentList) {
|
|
44
|
-
currentList = insertNewEmptyList(listStyle, itemLikeElement.element, writer);
|
|
45
|
-
}
|
|
46
|
-
else if (itemLikeElement.indent > currentIndentation) {
|
|
47
|
-
const lastListItem = currentList.getChild(currentList.childCount - 1);
|
|
48
|
-
const lastListItemChild = lastListItem.getChild(lastListItem.childCount - 1);
|
|
49
|
-
currentList = insertNewEmptyList(listStyle, lastListItemChild, writer);
|
|
50
|
-
currentIndentation += 1;
|
|
51
|
-
}
|
|
52
|
-
else if (itemLikeElement.indent < currentIndentation) {
|
|
53
|
-
const differentIndentation = currentIndentation - itemLikeElement.indent;
|
|
54
|
-
currentList = findParentListAtLevel(currentList, differentIndentation);
|
|
55
|
-
currentIndentation = itemLikeElement.indent;
|
|
56
|
-
}
|
|
57
|
-
if (itemLikeElement.indent <= currentIndentation) {
|
|
58
|
-
if (!currentList.is('element', listStyle.type)) {
|
|
59
|
-
currentList = writer.rename(listStyle.type, currentList);
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
const listItem = transformElementIntoListItem(itemLikeElement.element, writer);
|
|
64
|
-
writer.appendChild(listItem, currentList);
|
|
65
|
-
});
|
|
66
|
-
}
|
|
67
|
-
/**
|
|
68
|
-
* Removes paragraph wrapping content inside a list item.
|
|
69
|
-
*/
|
|
70
|
-
export function unwrapParagraphInListItem(documentFragment, writer) {
|
|
71
|
-
for (const value of writer.createRangeIn(documentFragment)) {
|
|
72
|
-
const element = value.item;
|
|
73
|
-
if (element.is('element', 'li')) {
|
|
74
|
-
// Google Docs allows for single paragraph inside LI.
|
|
75
|
-
const firstChild = element.getChild(0);
|
|
76
|
-
if (firstChild && firstChild.is('element', 'p')) {
|
|
77
|
-
writer.unwrapElement(firstChild);
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
/**
|
|
83
|
-
* Finds all list-like elements in a given document fragment.
|
|
84
|
-
*
|
|
85
|
-
* @param documentFragment Document fragment in which to look for list-like nodes.
|
|
86
|
-
* @returns Array of found list-like items. Each item is an object containing:
|
|
87
|
-
*/
|
|
88
|
-
function findAllItemLikeElements(documentFragment, writer) {
|
|
89
|
-
const range = writer.createRangeIn(documentFragment);
|
|
90
|
-
// Matcher for finding list-like elements.
|
|
91
|
-
const itemLikeElementsMatcher = new Matcher({
|
|
92
|
-
name: /^p|h\d+$/,
|
|
93
|
-
styles: {
|
|
94
|
-
'mso-list': /.*/
|
|
95
|
-
}
|
|
96
|
-
});
|
|
97
|
-
const itemLikeElements = [];
|
|
98
|
-
for (const value of range) {
|
|
99
|
-
if (value.type === 'elementStart' && itemLikeElementsMatcher.match(value.item)) {
|
|
100
|
-
const itemData = getListItemData(value.item);
|
|
101
|
-
itemLikeElements.push({
|
|
102
|
-
element: value.item,
|
|
103
|
-
id: itemData.id,
|
|
104
|
-
order: itemData.order,
|
|
105
|
-
indent: itemData.indent
|
|
106
|
-
});
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
return itemLikeElements;
|
|
110
|
-
}
|
|
111
|
-
/**
|
|
112
|
-
* Extracts list item style from the provided CSS.
|
|
113
|
-
*
|
|
114
|
-
* List item style is extracted from the CSS stylesheet. Each list with its specific style attribute
|
|
115
|
-
* value (`mso-list:l1 level1 lfo1`) has its dedicated properties in a CSS stylesheet defined with a selector like:
|
|
116
|
-
*
|
|
117
|
-
* ```css
|
|
118
|
-
* @list l1:level1 { ... }
|
|
119
|
-
* ```
|
|
120
|
-
*
|
|
121
|
-
* It contains `mso-level-number-format` property which defines list numbering/bullet style. If this property
|
|
122
|
-
* is not defined it means default `decimal` numbering.
|
|
123
|
-
*
|
|
124
|
-
* Here CSS string representation is used as `mso-level-number-format` property is an invalid CSS property
|
|
125
|
-
* and will be removed during CSS parsing.
|
|
126
|
-
*
|
|
127
|
-
* @param listLikeItem List-like item for which list style will be searched for. Usually
|
|
128
|
-
* a result of `findAllItemLikeElements()` function.
|
|
129
|
-
* @param stylesString CSS stylesheet.
|
|
130
|
-
* @returns An object with properties:
|
|
131
|
-
*
|
|
132
|
-
* * type - List type, could be `ul` or `ol`.
|
|
133
|
-
* * startIndex - List start index, valid only for ordered lists.
|
|
134
|
-
* * style - List style, for example: `decimal`, `lower-roman`, etc. It is extracted
|
|
135
|
-
* directly from Word stylesheet and adjusted to represent proper values for the CSS `list-style-type` property.
|
|
136
|
-
* If it cannot be adjusted, the `null` value is returned.
|
|
137
|
-
*/
|
|
138
|
-
function detectListStyle(listLikeItem, stylesString) {
|
|
139
|
-
const listStyleRegexp = new RegExp(`@list l${listLikeItem.id}:level${listLikeItem.indent}\\s*({[^}]*)`, 'gi');
|
|
140
|
-
const listStyleTypeRegex = /mso-level-number-format:([^;]{0,100});/gi;
|
|
141
|
-
const listStartIndexRegex = /mso-level-start-at:\s{0,100}([0-9]{0,10})\s{0,100};/gi;
|
|
142
|
-
const listStyleMatch = listStyleRegexp.exec(stylesString);
|
|
143
|
-
let listStyleType = 'decimal'; // Decimal is default one.
|
|
144
|
-
let type = 'ol'; // <ol> is default list.
|
|
145
|
-
let startIndex = null;
|
|
146
|
-
if (listStyleMatch && listStyleMatch[1]) {
|
|
147
|
-
const listStyleTypeMatch = listStyleTypeRegex.exec(listStyleMatch[1]);
|
|
148
|
-
if (listStyleTypeMatch && listStyleTypeMatch[1]) {
|
|
149
|
-
listStyleType = listStyleTypeMatch[1].trim();
|
|
150
|
-
type = listStyleType !== 'bullet' && listStyleType !== 'image' ? 'ol' : 'ul';
|
|
151
|
-
}
|
|
152
|
-
// Styles for the numbered lists are always defined in the Word CSS stylesheet.
|
|
153
|
-
// Unordered lists MAY contain a value for the Word CSS definition `mso-level-text` but sometimes
|
|
154
|
-
// this tag is missing. And because of that, we cannot depend on that. We need to predict the list style value
|
|
155
|
-
// based on the list style marker element.
|
|
156
|
-
if (listStyleType === 'bullet') {
|
|
157
|
-
const bulletedStyle = findBulletedListStyle(listLikeItem.element);
|
|
158
|
-
if (bulletedStyle) {
|
|
159
|
-
listStyleType = bulletedStyle;
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
else {
|
|
163
|
-
const listStartIndexMatch = listStartIndexRegex.exec(listStyleMatch[1]);
|
|
164
|
-
if (listStartIndexMatch && listStartIndexMatch[1]) {
|
|
165
|
-
startIndex = parseInt(listStartIndexMatch[1]);
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
return {
|
|
170
|
-
type,
|
|
171
|
-
startIndex,
|
|
172
|
-
style: mapListStyleDefinition(listStyleType)
|
|
173
|
-
};
|
|
174
|
-
}
|
|
175
|
-
/**
|
|
176
|
-
* Tries to extract the `list-style-type` value based on the marker element for bulleted list.
|
|
177
|
-
*/
|
|
178
|
-
function findBulletedListStyle(element) {
|
|
179
|
-
const listMarkerElement = findListMarkerNode(element);
|
|
180
|
-
if (!listMarkerElement) {
|
|
181
|
-
return null;
|
|
182
|
-
}
|
|
183
|
-
const listMarker = listMarkerElement._data;
|
|
184
|
-
if (listMarker === 'o') {
|
|
185
|
-
return 'circle';
|
|
186
|
-
}
|
|
187
|
-
else if (listMarker === '·') {
|
|
188
|
-
return 'disc';
|
|
189
|
-
}
|
|
190
|
-
// Word returns '§' instead of '■' for the square list style.
|
|
191
|
-
else if (listMarker === '§') {
|
|
192
|
-
return 'square';
|
|
193
|
-
}
|
|
194
|
-
return null;
|
|
195
|
-
}
|
|
196
|
-
/**
|
|
197
|
-
* Tries to find a text node that represents the marker element (list-style-type).
|
|
198
|
-
*/
|
|
199
|
-
function findListMarkerNode(element) {
|
|
200
|
-
// If the first child is a text node, it is the data for the element.
|
|
201
|
-
// The list-style marker is not present here.
|
|
202
|
-
if (element.getChild(0).is('$text')) {
|
|
203
|
-
return null;
|
|
204
|
-
}
|
|
205
|
-
for (const childNode of element.getChildren()) {
|
|
206
|
-
// The list-style marker will be inside the `<span>` element. Let's ignore all non-span elements.
|
|
207
|
-
// It may happen that the `<a>` element is added as the first child. Most probably, it's an anchor element.
|
|
208
|
-
if (!childNode.is('element', 'span')) {
|
|
209
|
-
continue;
|
|
210
|
-
}
|
|
211
|
-
const textNodeOrElement = childNode.getChild(0);
|
|
212
|
-
if (!textNodeOrElement) {
|
|
213
|
-
continue;
|
|
214
|
-
}
|
|
215
|
-
// If already found the marker element, use it.
|
|
216
|
-
if (textNodeOrElement.is('$text')) {
|
|
217
|
-
return textNodeOrElement;
|
|
218
|
-
}
|
|
219
|
-
return textNodeOrElement.getChild(0);
|
|
220
|
-
}
|
|
221
|
-
/* istanbul ignore next -- @preserve */
|
|
222
|
-
return null;
|
|
223
|
-
}
|
|
224
|
-
/**
|
|
225
|
-
* Parses the `list-style-type` value extracted directly from the Word CSS stylesheet and returns proper CSS definition.
|
|
226
|
-
*/
|
|
227
|
-
function mapListStyleDefinition(value) {
|
|
228
|
-
if (value.startsWith('arabic-leading-zero')) {
|
|
229
|
-
return 'decimal-leading-zero';
|
|
230
|
-
}
|
|
231
|
-
switch (value) {
|
|
232
|
-
case 'alpha-upper':
|
|
233
|
-
return 'upper-alpha';
|
|
234
|
-
case 'alpha-lower':
|
|
235
|
-
return 'lower-alpha';
|
|
236
|
-
case 'roman-upper':
|
|
237
|
-
return 'upper-roman';
|
|
238
|
-
case 'roman-lower':
|
|
239
|
-
return 'lower-roman';
|
|
240
|
-
case 'circle':
|
|
241
|
-
case 'disc':
|
|
242
|
-
case 'square':
|
|
243
|
-
return value;
|
|
244
|
-
default:
|
|
245
|
-
return null;
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
/**
|
|
249
|
-
* Creates an empty list of a given type and inserts it after a specified element.
|
|
250
|
-
*
|
|
251
|
-
* @param listStyle List style object which determines the type of newly created list.
|
|
252
|
-
* Usually a result of `detectListStyle()` function.
|
|
253
|
-
* @param element Element after which list is inserted.
|
|
254
|
-
* @returns Newly created list element.
|
|
255
|
-
*/
|
|
256
|
-
function insertNewEmptyList(listStyle, element, writer) {
|
|
257
|
-
const parent = element.parent;
|
|
258
|
-
const list = writer.createElement(listStyle.type);
|
|
259
|
-
const position = parent.getChildIndex(element) + 1;
|
|
260
|
-
writer.insertChild(position, list, parent);
|
|
261
|
-
// We do not support modifying the marker for a particular list item.
|
|
262
|
-
// Set the value for the `list-style-type` property directly to the list container.
|
|
263
|
-
if (listStyle.style) {
|
|
264
|
-
writer.setStyle('list-style-type', listStyle.style, list);
|
|
265
|
-
}
|
|
266
|
-
if (listStyle.startIndex && listStyle.startIndex > 1) {
|
|
267
|
-
writer.setAttribute('start', listStyle.startIndex, list);
|
|
268
|
-
}
|
|
269
|
-
return list;
|
|
270
|
-
}
|
|
271
|
-
/**
|
|
272
|
-
* Transforms a given element into a semantic list item. As the function operates on a provided
|
|
273
|
-
* {module:engine/src/view/element~Element element} it will modify the view structure to which this element belongs.
|
|
274
|
-
*
|
|
275
|
-
* @param element Element which will be transformed into a list item.
|
|
276
|
-
* @returns New element to which the given one was transformed. It is
|
|
277
|
-
* inserted in place of the old element (the reference to the old element is lost due to renaming).
|
|
278
|
-
*/
|
|
279
|
-
function transformElementIntoListItem(element, writer) {
|
|
280
|
-
removeBulletElement(element, writer);
|
|
281
|
-
writer.removeStyle('text-indent', element); // #12361
|
|
282
|
-
return writer.rename('li', element);
|
|
283
|
-
}
|
|
284
|
-
/**
|
|
285
|
-
* Extracts list item information from Word specific list-like element style:
|
|
286
|
-
*
|
|
287
|
-
* ```
|
|
288
|
-
* `style="mso-list:l1 level1 lfo1"`
|
|
289
|
-
* ```
|
|
290
|
-
*
|
|
291
|
-
* where:
|
|
292
|
-
*
|
|
293
|
-
* ```
|
|
294
|
-
* * `l1` is a list id (however it does not mean this is a continuous list - see #43),
|
|
295
|
-
* * `level1` is a list item indentation level,
|
|
296
|
-
* * `lfo1` is a list insertion order in a document.
|
|
297
|
-
* ```
|
|
298
|
-
*
|
|
299
|
-
* @param element Element from which style data is extracted.
|
|
300
|
-
*/
|
|
301
|
-
function getListItemData(element) {
|
|
302
|
-
const data = {};
|
|
303
|
-
const listStyle = element.getStyle('mso-list');
|
|
304
|
-
if (listStyle) {
|
|
305
|
-
const idMatch = listStyle.match(/(^|\s{1,100})l(\d+)/i);
|
|
306
|
-
const orderMatch = listStyle.match(/\s{0,100}lfo(\d+)/i);
|
|
307
|
-
const indentMatch = listStyle.match(/\s{0,100}level(\d+)/i);
|
|
308
|
-
if (idMatch && orderMatch && indentMatch) {
|
|
309
|
-
data.id = idMatch[2];
|
|
310
|
-
data.order = orderMatch[1];
|
|
311
|
-
data.indent = parseInt(indentMatch[1]);
|
|
312
|
-
}
|
|
313
|
-
}
|
|
314
|
-
return data;
|
|
315
|
-
}
|
|
316
|
-
/**
|
|
317
|
-
* Removes span with a numbering/bullet from a given element.
|
|
318
|
-
*/
|
|
319
|
-
function removeBulletElement(element, writer) {
|
|
320
|
-
// Matcher for finding `span` elements holding lists numbering/bullets.
|
|
321
|
-
const bulletMatcher = new Matcher({
|
|
322
|
-
name: 'span',
|
|
323
|
-
styles: {
|
|
324
|
-
'mso-list': 'Ignore'
|
|
325
|
-
}
|
|
326
|
-
});
|
|
327
|
-
const range = writer.createRangeIn(element);
|
|
328
|
-
for (const value of range) {
|
|
329
|
-
if (value.type === 'elementStart' && bulletMatcher.match(value.item)) {
|
|
330
|
-
writer.remove(value.item);
|
|
331
|
-
}
|
|
332
|
-
}
|
|
333
|
-
}
|
|
334
|
-
/**
|
|
335
|
-
* Whether the previous and current items belong to the same list. It is determined based on `item.id`
|
|
336
|
-
* (extracted from `mso-list` style, see #getListItemData) and a previous sibling of the current item.
|
|
337
|
-
*
|
|
338
|
-
* However, it's quite easy to change the `id` attribute for nested lists in Word. It will break the list feature while pasting.
|
|
339
|
-
* Let's check also the `indent` attribute. If the difference between those two elements is equal to 1, we can assume that
|
|
340
|
-
* the `currentItem` is a beginning of the nested list because lists in CKEditor 5 always start with the `indent=0` attribute.
|
|
341
|
-
* See: https://github.com/ckeditor/ckeditor5/issues/7805.
|
|
342
|
-
*/
|
|
343
|
-
function isNewListNeeded(previousItem, currentItem) {
|
|
344
|
-
if (!previousItem) {
|
|
345
|
-
return true;
|
|
346
|
-
}
|
|
347
|
-
if (previousItem.id !== currentItem.id) {
|
|
348
|
-
// See: https://github.com/ckeditor/ckeditor5/issues/7805.
|
|
349
|
-
//
|
|
350
|
-
// * List item 1.
|
|
351
|
-
// - Nested list item 1.
|
|
352
|
-
if (currentItem.indent - previousItem.indent === 1) {
|
|
353
|
-
return false;
|
|
354
|
-
}
|
|
355
|
-
return true;
|
|
356
|
-
}
|
|
357
|
-
const previousSibling = currentItem.element.previousSibling;
|
|
358
|
-
if (!previousSibling) {
|
|
359
|
-
return true;
|
|
360
|
-
}
|
|
361
|
-
// Even with the same id the list does not have to be continuous (#43).
|
|
362
|
-
return !isList(previousSibling);
|
|
363
|
-
}
|
|
364
|
-
function isList(element) {
|
|
365
|
-
return element.is('element', 'ol') || element.is('element', 'ul');
|
|
366
|
-
}
|
|
367
|
-
/**
|
|
368
|
-
* Calculates the indentation difference between two given list items (based on the indent attribute
|
|
369
|
-
* extracted from the `mso-list` style, see #getListItemData).
|
|
370
|
-
*/
|
|
371
|
-
function getIndentationDifference(previousItem, currentItem) {
|
|
372
|
-
return previousItem ? currentItem.indent - previousItem.indent : currentItem.indent - 1;
|
|
373
|
-
}
|
|
374
|
-
/**
|
|
375
|
-
* Finds the parent list element (ul/ol) of a given list element with indentation level lower by a given value.
|
|
376
|
-
*
|
|
377
|
-
* @param listElement List element from which to start looking for a parent list.
|
|
378
|
-
* @param indentationDifference Indentation difference between lists.
|
|
379
|
-
* @returns Found list element with indentation level lower by a given value.
|
|
380
|
-
*/
|
|
381
|
-
function findParentListAtLevel(listElement, indentationDifference) {
|
|
382
|
-
const ancestors = listElement.getAncestors({ parentFirst: true });
|
|
383
|
-
let parentList = null;
|
|
384
|
-
let levelChange = 0;
|
|
385
|
-
for (const ancestor of ancestors) {
|
|
386
|
-
if (ancestor.is('element', 'ul') || ancestor.is('element', 'ol')) {
|
|
387
|
-
levelChange++;
|
|
388
|
-
}
|
|
389
|
-
if (levelChange === indentationDifference) {
|
|
390
|
-
parentList = ancestor;
|
|
391
|
-
break;
|
|
392
|
-
}
|
|
393
|
-
}
|
|
394
|
-
return parentList;
|
|
395
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
|
|
3
|
+
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* @module paste-from-office/filters/list
|
|
7
|
+
*/
|
|
8
|
+
import { Matcher, UpcastWriter } from 'ckeditor5/src/engine';
|
|
9
|
+
/**
|
|
10
|
+
* Transforms Word specific list-like elements to the semantic HTML lists.
|
|
11
|
+
*
|
|
12
|
+
* Lists in Word are represented by block elements with special attributes like:
|
|
13
|
+
*
|
|
14
|
+
* ```xml
|
|
15
|
+
* <p class=MsoListParagraphCxSpFirst style='mso-list:l1 level1 lfo1'>...</p> // Paragraph based list.
|
|
16
|
+
* <h1 style='mso-list:l0 level1 lfo1'>...</h1> // Heading 1 based list.
|
|
17
|
+
* ```
|
|
18
|
+
*
|
|
19
|
+
* @param documentFragment The view structure to be transformed.
|
|
20
|
+
* @param stylesString Styles from which list-like elements styling will be extracted.
|
|
21
|
+
*/
|
|
22
|
+
export function transformListItemLikeElementsIntoLists(documentFragment, stylesString) {
|
|
23
|
+
if (!documentFragment.childCount) {
|
|
24
|
+
return;
|
|
25
|
+
}
|
|
26
|
+
const writer = new UpcastWriter(documentFragment.document);
|
|
27
|
+
const itemLikeElements = findAllItemLikeElements(documentFragment, writer);
|
|
28
|
+
if (!itemLikeElements.length) {
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
let currentList = null;
|
|
32
|
+
let currentIndentation = 1;
|
|
33
|
+
itemLikeElements.forEach((itemLikeElement, i) => {
|
|
34
|
+
const isDifferentList = isNewListNeeded(itemLikeElements[i - 1], itemLikeElement);
|
|
35
|
+
const previousItemLikeElement = isDifferentList ? null : itemLikeElements[i - 1];
|
|
36
|
+
const indentationDifference = getIndentationDifference(previousItemLikeElement, itemLikeElement);
|
|
37
|
+
if (isDifferentList) {
|
|
38
|
+
currentList = null;
|
|
39
|
+
currentIndentation = 1;
|
|
40
|
+
}
|
|
41
|
+
if (!currentList || indentationDifference !== 0) {
|
|
42
|
+
const listStyle = detectListStyle(itemLikeElement, stylesString);
|
|
43
|
+
if (!currentList) {
|
|
44
|
+
currentList = insertNewEmptyList(listStyle, itemLikeElement.element, writer);
|
|
45
|
+
}
|
|
46
|
+
else if (itemLikeElement.indent > currentIndentation) {
|
|
47
|
+
const lastListItem = currentList.getChild(currentList.childCount - 1);
|
|
48
|
+
const lastListItemChild = lastListItem.getChild(lastListItem.childCount - 1);
|
|
49
|
+
currentList = insertNewEmptyList(listStyle, lastListItemChild, writer);
|
|
50
|
+
currentIndentation += 1;
|
|
51
|
+
}
|
|
52
|
+
else if (itemLikeElement.indent < currentIndentation) {
|
|
53
|
+
const differentIndentation = currentIndentation - itemLikeElement.indent;
|
|
54
|
+
currentList = findParentListAtLevel(currentList, differentIndentation);
|
|
55
|
+
currentIndentation = itemLikeElement.indent;
|
|
56
|
+
}
|
|
57
|
+
if (itemLikeElement.indent <= currentIndentation) {
|
|
58
|
+
if (!currentList.is('element', listStyle.type)) {
|
|
59
|
+
currentList = writer.rename(listStyle.type, currentList);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
const listItem = transformElementIntoListItem(itemLikeElement.element, writer);
|
|
64
|
+
writer.appendChild(listItem, currentList);
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Removes paragraph wrapping content inside a list item.
|
|
69
|
+
*/
|
|
70
|
+
export function unwrapParagraphInListItem(documentFragment, writer) {
|
|
71
|
+
for (const value of writer.createRangeIn(documentFragment)) {
|
|
72
|
+
const element = value.item;
|
|
73
|
+
if (element.is('element', 'li')) {
|
|
74
|
+
// Google Docs allows for single paragraph inside LI.
|
|
75
|
+
const firstChild = element.getChild(0);
|
|
76
|
+
if (firstChild && firstChild.is('element', 'p')) {
|
|
77
|
+
writer.unwrapElement(firstChild);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Finds all list-like elements in a given document fragment.
|
|
84
|
+
*
|
|
85
|
+
* @param documentFragment Document fragment in which to look for list-like nodes.
|
|
86
|
+
* @returns Array of found list-like items. Each item is an object containing:
|
|
87
|
+
*/
|
|
88
|
+
function findAllItemLikeElements(documentFragment, writer) {
|
|
89
|
+
const range = writer.createRangeIn(documentFragment);
|
|
90
|
+
// Matcher for finding list-like elements.
|
|
91
|
+
const itemLikeElementsMatcher = new Matcher({
|
|
92
|
+
name: /^p|h\d+$/,
|
|
93
|
+
styles: {
|
|
94
|
+
'mso-list': /.*/
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
const itemLikeElements = [];
|
|
98
|
+
for (const value of range) {
|
|
99
|
+
if (value.type === 'elementStart' && itemLikeElementsMatcher.match(value.item)) {
|
|
100
|
+
const itemData = getListItemData(value.item);
|
|
101
|
+
itemLikeElements.push({
|
|
102
|
+
element: value.item,
|
|
103
|
+
id: itemData.id,
|
|
104
|
+
order: itemData.order,
|
|
105
|
+
indent: itemData.indent
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return itemLikeElements;
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Extracts list item style from the provided CSS.
|
|
113
|
+
*
|
|
114
|
+
* List item style is extracted from the CSS stylesheet. Each list with its specific style attribute
|
|
115
|
+
* value (`mso-list:l1 level1 lfo1`) has its dedicated properties in a CSS stylesheet defined with a selector like:
|
|
116
|
+
*
|
|
117
|
+
* ```css
|
|
118
|
+
* @list l1:level1 { ... }
|
|
119
|
+
* ```
|
|
120
|
+
*
|
|
121
|
+
* It contains `mso-level-number-format` property which defines list numbering/bullet style. If this property
|
|
122
|
+
* is not defined it means default `decimal` numbering.
|
|
123
|
+
*
|
|
124
|
+
* Here CSS string representation is used as `mso-level-number-format` property is an invalid CSS property
|
|
125
|
+
* and will be removed during CSS parsing.
|
|
126
|
+
*
|
|
127
|
+
* @param listLikeItem List-like item for which list style will be searched for. Usually
|
|
128
|
+
* a result of `findAllItemLikeElements()` function.
|
|
129
|
+
* @param stylesString CSS stylesheet.
|
|
130
|
+
* @returns An object with properties:
|
|
131
|
+
*
|
|
132
|
+
* * type - List type, could be `ul` or `ol`.
|
|
133
|
+
* * startIndex - List start index, valid only for ordered lists.
|
|
134
|
+
* * style - List style, for example: `decimal`, `lower-roman`, etc. It is extracted
|
|
135
|
+
* directly from Word stylesheet and adjusted to represent proper values for the CSS `list-style-type` property.
|
|
136
|
+
* If it cannot be adjusted, the `null` value is returned.
|
|
137
|
+
*/
|
|
138
|
+
function detectListStyle(listLikeItem, stylesString) {
|
|
139
|
+
const listStyleRegexp = new RegExp(`@list l${listLikeItem.id}:level${listLikeItem.indent}\\s*({[^}]*)`, 'gi');
|
|
140
|
+
const listStyleTypeRegex = /mso-level-number-format:([^;]{0,100});/gi;
|
|
141
|
+
const listStartIndexRegex = /mso-level-start-at:\s{0,100}([0-9]{0,10})\s{0,100};/gi;
|
|
142
|
+
const listStyleMatch = listStyleRegexp.exec(stylesString);
|
|
143
|
+
let listStyleType = 'decimal'; // Decimal is default one.
|
|
144
|
+
let type = 'ol'; // <ol> is default list.
|
|
145
|
+
let startIndex = null;
|
|
146
|
+
if (listStyleMatch && listStyleMatch[1]) {
|
|
147
|
+
const listStyleTypeMatch = listStyleTypeRegex.exec(listStyleMatch[1]);
|
|
148
|
+
if (listStyleTypeMatch && listStyleTypeMatch[1]) {
|
|
149
|
+
listStyleType = listStyleTypeMatch[1].trim();
|
|
150
|
+
type = listStyleType !== 'bullet' && listStyleType !== 'image' ? 'ol' : 'ul';
|
|
151
|
+
}
|
|
152
|
+
// Styles for the numbered lists are always defined in the Word CSS stylesheet.
|
|
153
|
+
// Unordered lists MAY contain a value for the Word CSS definition `mso-level-text` but sometimes
|
|
154
|
+
// this tag is missing. And because of that, we cannot depend on that. We need to predict the list style value
|
|
155
|
+
// based on the list style marker element.
|
|
156
|
+
if (listStyleType === 'bullet') {
|
|
157
|
+
const bulletedStyle = findBulletedListStyle(listLikeItem.element);
|
|
158
|
+
if (bulletedStyle) {
|
|
159
|
+
listStyleType = bulletedStyle;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
else {
|
|
163
|
+
const listStartIndexMatch = listStartIndexRegex.exec(listStyleMatch[1]);
|
|
164
|
+
if (listStartIndexMatch && listStartIndexMatch[1]) {
|
|
165
|
+
startIndex = parseInt(listStartIndexMatch[1]);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
return {
|
|
170
|
+
type,
|
|
171
|
+
startIndex,
|
|
172
|
+
style: mapListStyleDefinition(listStyleType)
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Tries to extract the `list-style-type` value based on the marker element for bulleted list.
|
|
177
|
+
*/
|
|
178
|
+
function findBulletedListStyle(element) {
|
|
179
|
+
const listMarkerElement = findListMarkerNode(element);
|
|
180
|
+
if (!listMarkerElement) {
|
|
181
|
+
return null;
|
|
182
|
+
}
|
|
183
|
+
const listMarker = listMarkerElement._data;
|
|
184
|
+
if (listMarker === 'o') {
|
|
185
|
+
return 'circle';
|
|
186
|
+
}
|
|
187
|
+
else if (listMarker === '·') {
|
|
188
|
+
return 'disc';
|
|
189
|
+
}
|
|
190
|
+
// Word returns '§' instead of '■' for the square list style.
|
|
191
|
+
else if (listMarker === '§') {
|
|
192
|
+
return 'square';
|
|
193
|
+
}
|
|
194
|
+
return null;
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Tries to find a text node that represents the marker element (list-style-type).
|
|
198
|
+
*/
|
|
199
|
+
function findListMarkerNode(element) {
|
|
200
|
+
// If the first child is a text node, it is the data for the element.
|
|
201
|
+
// The list-style marker is not present here.
|
|
202
|
+
if (element.getChild(0).is('$text')) {
|
|
203
|
+
return null;
|
|
204
|
+
}
|
|
205
|
+
for (const childNode of element.getChildren()) {
|
|
206
|
+
// The list-style marker will be inside the `<span>` element. Let's ignore all non-span elements.
|
|
207
|
+
// It may happen that the `<a>` element is added as the first child. Most probably, it's an anchor element.
|
|
208
|
+
if (!childNode.is('element', 'span')) {
|
|
209
|
+
continue;
|
|
210
|
+
}
|
|
211
|
+
const textNodeOrElement = childNode.getChild(0);
|
|
212
|
+
if (!textNodeOrElement) {
|
|
213
|
+
continue;
|
|
214
|
+
}
|
|
215
|
+
// If already found the marker element, use it.
|
|
216
|
+
if (textNodeOrElement.is('$text')) {
|
|
217
|
+
return textNodeOrElement;
|
|
218
|
+
}
|
|
219
|
+
return textNodeOrElement.getChild(0);
|
|
220
|
+
}
|
|
221
|
+
/* istanbul ignore next -- @preserve */
|
|
222
|
+
return null;
|
|
223
|
+
}
|
|
224
|
+
/**
|
|
225
|
+
* Parses the `list-style-type` value extracted directly from the Word CSS stylesheet and returns proper CSS definition.
|
|
226
|
+
*/
|
|
227
|
+
function mapListStyleDefinition(value) {
|
|
228
|
+
if (value.startsWith('arabic-leading-zero')) {
|
|
229
|
+
return 'decimal-leading-zero';
|
|
230
|
+
}
|
|
231
|
+
switch (value) {
|
|
232
|
+
case 'alpha-upper':
|
|
233
|
+
return 'upper-alpha';
|
|
234
|
+
case 'alpha-lower':
|
|
235
|
+
return 'lower-alpha';
|
|
236
|
+
case 'roman-upper':
|
|
237
|
+
return 'upper-roman';
|
|
238
|
+
case 'roman-lower':
|
|
239
|
+
return 'lower-roman';
|
|
240
|
+
case 'circle':
|
|
241
|
+
case 'disc':
|
|
242
|
+
case 'square':
|
|
243
|
+
return value;
|
|
244
|
+
default:
|
|
245
|
+
return null;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Creates an empty list of a given type and inserts it after a specified element.
|
|
250
|
+
*
|
|
251
|
+
* @param listStyle List style object which determines the type of newly created list.
|
|
252
|
+
* Usually a result of `detectListStyle()` function.
|
|
253
|
+
* @param element Element after which list is inserted.
|
|
254
|
+
* @returns Newly created list element.
|
|
255
|
+
*/
|
|
256
|
+
function insertNewEmptyList(listStyle, element, writer) {
|
|
257
|
+
const parent = element.parent;
|
|
258
|
+
const list = writer.createElement(listStyle.type);
|
|
259
|
+
const position = parent.getChildIndex(element) + 1;
|
|
260
|
+
writer.insertChild(position, list, parent);
|
|
261
|
+
// We do not support modifying the marker for a particular list item.
|
|
262
|
+
// Set the value for the `list-style-type` property directly to the list container.
|
|
263
|
+
if (listStyle.style) {
|
|
264
|
+
writer.setStyle('list-style-type', listStyle.style, list);
|
|
265
|
+
}
|
|
266
|
+
if (listStyle.startIndex && listStyle.startIndex > 1) {
|
|
267
|
+
writer.setAttribute('start', listStyle.startIndex, list);
|
|
268
|
+
}
|
|
269
|
+
return list;
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Transforms a given element into a semantic list item. As the function operates on a provided
|
|
273
|
+
* {module:engine/src/view/element~Element element} it will modify the view structure to which this element belongs.
|
|
274
|
+
*
|
|
275
|
+
* @param element Element which will be transformed into a list item.
|
|
276
|
+
* @returns New element to which the given one was transformed. It is
|
|
277
|
+
* inserted in place of the old element (the reference to the old element is lost due to renaming).
|
|
278
|
+
*/
|
|
279
|
+
function transformElementIntoListItem(element, writer) {
|
|
280
|
+
removeBulletElement(element, writer);
|
|
281
|
+
writer.removeStyle('text-indent', element); // #12361
|
|
282
|
+
return writer.rename('li', element);
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Extracts list item information from Word specific list-like element style:
|
|
286
|
+
*
|
|
287
|
+
* ```
|
|
288
|
+
* `style="mso-list:l1 level1 lfo1"`
|
|
289
|
+
* ```
|
|
290
|
+
*
|
|
291
|
+
* where:
|
|
292
|
+
*
|
|
293
|
+
* ```
|
|
294
|
+
* * `l1` is a list id (however it does not mean this is a continuous list - see #43),
|
|
295
|
+
* * `level1` is a list item indentation level,
|
|
296
|
+
* * `lfo1` is a list insertion order in a document.
|
|
297
|
+
* ```
|
|
298
|
+
*
|
|
299
|
+
* @param element Element from which style data is extracted.
|
|
300
|
+
*/
|
|
301
|
+
function getListItemData(element) {
|
|
302
|
+
const data = {};
|
|
303
|
+
const listStyle = element.getStyle('mso-list');
|
|
304
|
+
if (listStyle) {
|
|
305
|
+
const idMatch = listStyle.match(/(^|\s{1,100})l(\d+)/i);
|
|
306
|
+
const orderMatch = listStyle.match(/\s{0,100}lfo(\d+)/i);
|
|
307
|
+
const indentMatch = listStyle.match(/\s{0,100}level(\d+)/i);
|
|
308
|
+
if (idMatch && orderMatch && indentMatch) {
|
|
309
|
+
data.id = idMatch[2];
|
|
310
|
+
data.order = orderMatch[1];
|
|
311
|
+
data.indent = parseInt(indentMatch[1]);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
return data;
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Removes span with a numbering/bullet from a given element.
|
|
318
|
+
*/
|
|
319
|
+
function removeBulletElement(element, writer) {
|
|
320
|
+
// Matcher for finding `span` elements holding lists numbering/bullets.
|
|
321
|
+
const bulletMatcher = new Matcher({
|
|
322
|
+
name: 'span',
|
|
323
|
+
styles: {
|
|
324
|
+
'mso-list': 'Ignore'
|
|
325
|
+
}
|
|
326
|
+
});
|
|
327
|
+
const range = writer.createRangeIn(element);
|
|
328
|
+
for (const value of range) {
|
|
329
|
+
if (value.type === 'elementStart' && bulletMatcher.match(value.item)) {
|
|
330
|
+
writer.remove(value.item);
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
/**
|
|
335
|
+
* Whether the previous and current items belong to the same list. It is determined based on `item.id`
|
|
336
|
+
* (extracted from `mso-list` style, see #getListItemData) and a previous sibling of the current item.
|
|
337
|
+
*
|
|
338
|
+
* However, it's quite easy to change the `id` attribute for nested lists in Word. It will break the list feature while pasting.
|
|
339
|
+
* Let's check also the `indent` attribute. If the difference between those two elements is equal to 1, we can assume that
|
|
340
|
+
* the `currentItem` is a beginning of the nested list because lists in CKEditor 5 always start with the `indent=0` attribute.
|
|
341
|
+
* See: https://github.com/ckeditor/ckeditor5/issues/7805.
|
|
342
|
+
*/
|
|
343
|
+
function isNewListNeeded(previousItem, currentItem) {
|
|
344
|
+
if (!previousItem) {
|
|
345
|
+
return true;
|
|
346
|
+
}
|
|
347
|
+
if (previousItem.id !== currentItem.id) {
|
|
348
|
+
// See: https://github.com/ckeditor/ckeditor5/issues/7805.
|
|
349
|
+
//
|
|
350
|
+
// * List item 1.
|
|
351
|
+
// - Nested list item 1.
|
|
352
|
+
if (currentItem.indent - previousItem.indent === 1) {
|
|
353
|
+
return false;
|
|
354
|
+
}
|
|
355
|
+
return true;
|
|
356
|
+
}
|
|
357
|
+
const previousSibling = currentItem.element.previousSibling;
|
|
358
|
+
if (!previousSibling) {
|
|
359
|
+
return true;
|
|
360
|
+
}
|
|
361
|
+
// Even with the same id the list does not have to be continuous (#43).
|
|
362
|
+
return !isList(previousSibling);
|
|
363
|
+
}
|
|
364
|
+
function isList(element) {
|
|
365
|
+
return element.is('element', 'ol') || element.is('element', 'ul');
|
|
366
|
+
}
|
|
367
|
+
/**
|
|
368
|
+
* Calculates the indentation difference between two given list items (based on the indent attribute
|
|
369
|
+
* extracted from the `mso-list` style, see #getListItemData).
|
|
370
|
+
*/
|
|
371
|
+
function getIndentationDifference(previousItem, currentItem) {
|
|
372
|
+
return previousItem ? currentItem.indent - previousItem.indent : currentItem.indent - 1;
|
|
373
|
+
}
|
|
374
|
+
/**
|
|
375
|
+
* Finds the parent list element (ul/ol) of a given list element with indentation level lower by a given value.
|
|
376
|
+
*
|
|
377
|
+
* @param listElement List element from which to start looking for a parent list.
|
|
378
|
+
* @param indentationDifference Indentation difference between lists.
|
|
379
|
+
* @returns Found list element with indentation level lower by a given value.
|
|
380
|
+
*/
|
|
381
|
+
function findParentListAtLevel(listElement, indentationDifference) {
|
|
382
|
+
const ancestors = listElement.getAncestors({ parentFirst: true });
|
|
383
|
+
let parentList = null;
|
|
384
|
+
let levelChange = 0;
|
|
385
|
+
for (const ancestor of ancestors) {
|
|
386
|
+
if (ancestor.is('element', 'ul') || ancestor.is('element', 'ol')) {
|
|
387
|
+
levelChange++;
|
|
388
|
+
}
|
|
389
|
+
if (levelChange === indentationDifference) {
|
|
390
|
+
parentList = ancestor;
|
|
391
|
+
break;
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
return parentList;
|
|
395
|
+
}
|