@ckeditor/ckeditor5-paste-from-office 35.3.2 → 36.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +1 -1
- package/build/paste-from-office.js +2 -2
- package/package.json +28 -24
- package/src/filters/br.js +51 -63
- package/src/filters/image.js +216 -197
- package/src/filters/list.js +359 -429
- package/src/filters/parse.js +76 -106
- package/src/filters/removeboldwrapper.js +10 -17
- package/src/filters/space.js +37 -39
- package/src/index.js +1 -3
- package/src/normalizer.js +5 -0
- package/src/normalizers/googledocsnormalizer.js +26 -41
- package/src/normalizers/mswordnormalizer.js +24 -38
- package/src/pastefromoffice.js +41 -62
- package/build/paste-from-office.js.map +0 -1
- package/src/normalizer.jsdoc +0 -34
package/src/filters/image.js
CHANGED
|
@@ -1,222 +1,241 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @license Copyright (c) 2003-
|
|
2
|
+
* @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
|
|
3
3
|
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
|
|
4
4
|
*/
|
|
5
|
-
|
|
6
5
|
/**
|
|
7
6
|
* @module paste-from-office/filters/image
|
|
8
7
|
*/
|
|
9
|
-
|
|
10
8
|
/* globals btoa */
|
|
11
|
-
|
|
12
9
|
import { Matcher, UpcastWriter } from 'ckeditor5/src/engine';
|
|
13
|
-
|
|
14
10
|
/**
|
|
15
11
|
* Replaces source attribute of all `<img>` elements representing regular
|
|
16
12
|
* images (not the Word shapes) with inlined base64 image representation extracted from RTF or Blob data.
|
|
17
13
|
*
|
|
18
|
-
* @param
|
|
19
|
-
* @param
|
|
14
|
+
* @param documentFragment Document fragment on which transform images.
|
|
15
|
+
* @param rtfData The RTF data from which images representation will be used.
|
|
20
16
|
*/
|
|
21
|
-
export function replaceImagesSourceWithBase64(
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
if ( images.length ) {
|
|
35
|
-
replaceImagesFileSourceWithInlineRepresentation( images, extractImageDataFromRtf( rtfData ), upcastWriter );
|
|
36
|
-
}
|
|
17
|
+
export function replaceImagesSourceWithBase64(documentFragment, rtfData) {
|
|
18
|
+
if (!documentFragment.childCount) {
|
|
19
|
+
return;
|
|
20
|
+
}
|
|
21
|
+
const upcastWriter = new UpcastWriter(documentFragment.document);
|
|
22
|
+
const shapesIds = findAllShapesIds(documentFragment, upcastWriter);
|
|
23
|
+
removeAllImgElementsRepresentingShapes(shapesIds, documentFragment, upcastWriter);
|
|
24
|
+
insertMissingImgs(shapesIds, documentFragment, upcastWriter);
|
|
25
|
+
removeAllShapeElements(documentFragment, upcastWriter);
|
|
26
|
+
const images = findAllImageElementsWithLocalSource(documentFragment, upcastWriter);
|
|
27
|
+
if (images.length) {
|
|
28
|
+
replaceImagesFileSourceWithInlineRepresentation(images, extractImageDataFromRtf(rtfData), upcastWriter);
|
|
29
|
+
}
|
|
37
30
|
}
|
|
38
|
-
|
|
39
31
|
/**
|
|
40
32
|
* Converts given HEX string to base64 representation.
|
|
41
33
|
*
|
|
42
|
-
* @
|
|
43
|
-
* @param
|
|
44
|
-
* @returns
|
|
34
|
+
* @internal
|
|
35
|
+
* @param hexString The HEX string to be converted.
|
|
36
|
+
* @returns Base64 representation of a given HEX string.
|
|
45
37
|
*/
|
|
46
|
-
export function _convertHexToBase64(
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
38
|
+
export function _convertHexToBase64(hexString) {
|
|
39
|
+
return btoa(hexString.match(/\w{2}/g).map(char => {
|
|
40
|
+
return String.fromCharCode(parseInt(char, 16));
|
|
41
|
+
}).join(''));
|
|
50
42
|
}
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
if ( shapeElementsMatcher.match( el ) && el.getAttribute( 'o:gfxdata' ) && prevSiblingName !== 'v:shapetype' ) {
|
|
78
|
-
shapesIds.push( value.item.getAttribute( 'id' ) );
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
return shapesIds;
|
|
43
|
+
/**
|
|
44
|
+
* Finds all shapes (`<v:*>...</v:*>`) ids. Shapes can represent images (canvas)
|
|
45
|
+
* or Word shapes (which does not have RTF or Blob representation).
|
|
46
|
+
*
|
|
47
|
+
* @param documentFragment Document fragment from which to extract shape ids.
|
|
48
|
+
* @returns Array of shape ids.
|
|
49
|
+
*/
|
|
50
|
+
function findAllShapesIds(documentFragment, writer) {
|
|
51
|
+
const range = writer.createRangeIn(documentFragment);
|
|
52
|
+
const shapeElementsMatcher = new Matcher({
|
|
53
|
+
name: /v:(.+)/
|
|
54
|
+
});
|
|
55
|
+
const shapesIds = [];
|
|
56
|
+
for (const value of range) {
|
|
57
|
+
if (value.type != 'elementStart') {
|
|
58
|
+
continue;
|
|
59
|
+
}
|
|
60
|
+
const el = value.item;
|
|
61
|
+
const previousSibling = el.previousSibling;
|
|
62
|
+
const prevSiblingName = previousSibling && previousSibling.is('element') ? previousSibling.name : null;
|
|
63
|
+
// If shape element have 'o:gfxdata' attribute and is not directly before `<v:shapetype>` element it means it represent Word shape.
|
|
64
|
+
if (shapeElementsMatcher.match(el) && el.getAttribute('o:gfxdata') && prevSiblingName !== 'v:shapetype') {
|
|
65
|
+
shapesIds.push(value.item.getAttribute('id'));
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return shapesIds;
|
|
83
69
|
}
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
function removeAllImgElementsRepresentingShapes(
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
for ( const img of imgs ) {
|
|
114
|
-
writer.remove( img );
|
|
115
|
-
}
|
|
70
|
+
/**
|
|
71
|
+
* Removes all `<img>` elements which represents Word shapes and not regular images.
|
|
72
|
+
*
|
|
73
|
+
* @param shapesIds Shape ids which will be checked against `<img>` elements.
|
|
74
|
+
* @param documentFragment Document fragment from which to remove `<img>` elements.
|
|
75
|
+
*/
|
|
76
|
+
function removeAllImgElementsRepresentingShapes(shapesIds, documentFragment, writer) {
|
|
77
|
+
const range = writer.createRangeIn(documentFragment);
|
|
78
|
+
const imageElementsMatcher = new Matcher({
|
|
79
|
+
name: 'img'
|
|
80
|
+
});
|
|
81
|
+
const imgs = [];
|
|
82
|
+
for (const value of range) {
|
|
83
|
+
if (value.item.is('element') && imageElementsMatcher.match(value.item)) {
|
|
84
|
+
const el = value.item;
|
|
85
|
+
const shapes = el.getAttribute('v:shapes') ? el.getAttribute('v:shapes').split(' ') : [];
|
|
86
|
+
if (shapes.length && shapes.every(shape => shapesIds.indexOf(shape) > -1)) {
|
|
87
|
+
imgs.push(el);
|
|
88
|
+
// Shapes may also have empty source while content is paste in some browsers (Safari).
|
|
89
|
+
}
|
|
90
|
+
else if (!el.getAttribute('src')) {
|
|
91
|
+
imgs.push(el);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
for (const img of imgs) {
|
|
96
|
+
writer.remove(img);
|
|
97
|
+
}
|
|
116
98
|
}
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
function removeAllShapeElements(
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
99
|
+
/**
|
|
100
|
+
* Removes all shape elements (`<v:*>...</v:*>`) so they do not pollute the output structure.
|
|
101
|
+
*
|
|
102
|
+
* @param documentFragment Document fragment from which to remove shape elements.
|
|
103
|
+
*/
|
|
104
|
+
function removeAllShapeElements(documentFragment, writer) {
|
|
105
|
+
const range = writer.createRangeIn(documentFragment);
|
|
106
|
+
const shapeElementsMatcher = new Matcher({
|
|
107
|
+
name: /v:(.+)/
|
|
108
|
+
});
|
|
109
|
+
const shapes = [];
|
|
110
|
+
for (const value of range) {
|
|
111
|
+
if (value.type == 'elementStart' && shapeElementsMatcher.match(value.item)) {
|
|
112
|
+
shapes.push(value.item);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
for (const shape of shapes) {
|
|
116
|
+
writer.remove(shape);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Inserts `img` tags if there is none after a shape.
|
|
121
|
+
*/
|
|
122
|
+
function insertMissingImgs(shapeIds, documentFragment, writer) {
|
|
123
|
+
const range = writer.createRangeIn(documentFragment);
|
|
124
|
+
const shapes = [];
|
|
125
|
+
for (const value of range) {
|
|
126
|
+
if (value.type == 'elementStart' && value.item.is('element', 'v:shape')) {
|
|
127
|
+
const id = value.item.getAttribute('id');
|
|
128
|
+
if (shapeIds.includes(id)) {
|
|
129
|
+
continue;
|
|
130
|
+
}
|
|
131
|
+
if (!containsMatchingImg(value.item.parent.getChildren(), id)) {
|
|
132
|
+
shapes.push(value.item);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
for (const shape of shapes) {
|
|
137
|
+
const attrs = {
|
|
138
|
+
src: findSrc(shape)
|
|
139
|
+
};
|
|
140
|
+
if (shape.hasAttribute('alt')) {
|
|
141
|
+
attrs.alt = shape.getAttribute('alt');
|
|
142
|
+
}
|
|
143
|
+
const img = writer.createElement('img', attrs);
|
|
144
|
+
writer.insertChild(shape.index + 1, img, shape.parent);
|
|
145
|
+
}
|
|
146
|
+
function containsMatchingImg(nodes, id) {
|
|
147
|
+
for (const node of nodes) {
|
|
148
|
+
/* istanbul ignore else */
|
|
149
|
+
if (node.is('element')) {
|
|
150
|
+
if (node.name == 'img' && node.getAttribute('v:shapes') == id) {
|
|
151
|
+
return true;
|
|
152
|
+
}
|
|
153
|
+
if (containsMatchingImg(node.getChildren(), id)) {
|
|
154
|
+
return true;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
return false;
|
|
159
|
+
}
|
|
160
|
+
function findSrc(shape) {
|
|
161
|
+
for (const child of shape.getChildren()) {
|
|
162
|
+
/* istanbul ignore else */
|
|
163
|
+
if (child.is('element') && child.getAttribute('src')) {
|
|
164
|
+
return child.getAttribute('src');
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
140
168
|
}
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
imgs.push( value.item );
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
return imgs;
|
|
169
|
+
/**
|
|
170
|
+
* Finds all `<img>` elements in a given document fragment which have source pointing to local `file://` resource.
|
|
171
|
+
*
|
|
172
|
+
* @param documentFragment Document fragment in which to look for `<img>` elements.
|
|
173
|
+
* @returns result All found images grouped by source type.
|
|
174
|
+
*/
|
|
175
|
+
function findAllImageElementsWithLocalSource(documentFragment, writer) {
|
|
176
|
+
const range = writer.createRangeIn(documentFragment);
|
|
177
|
+
const imageElementsMatcher = new Matcher({
|
|
178
|
+
name: 'img'
|
|
179
|
+
});
|
|
180
|
+
const imgs = [];
|
|
181
|
+
for (const value of range) {
|
|
182
|
+
if (value.item.is('element') && imageElementsMatcher.match(value.item)) {
|
|
183
|
+
if (value.item.getAttribute('src').startsWith('file://')) {
|
|
184
|
+
imgs.push(value.item);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return imgs;
|
|
167
189
|
}
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
return result;
|
|
190
|
+
/**
|
|
191
|
+
* Extracts all images HEX representations from a given RTF data.
|
|
192
|
+
*
|
|
193
|
+
* @param rtfData The RTF data from which to extract images HEX representation.
|
|
194
|
+
* @returns Array of found HEX representations. Each array item is an object containing:
|
|
195
|
+
*
|
|
196
|
+
* * hex Image representation in HEX format.
|
|
197
|
+
* * type Type of image, `image/png` or `image/jpeg`.
|
|
198
|
+
*/
|
|
199
|
+
function extractImageDataFromRtf(rtfData) {
|
|
200
|
+
if (!rtfData) {
|
|
201
|
+
return [];
|
|
202
|
+
}
|
|
203
|
+
const regexPictureHeader = /{\\pict[\s\S]+?\\bliptag-?\d+(\\blipupi-?\d+)?({\\\*\\blipuid\s?[\da-fA-F]+)?[\s}]*?/;
|
|
204
|
+
const regexPicture = new RegExp('(?:(' + regexPictureHeader.source + '))([\\da-fA-F\\s]+)\\}', 'g');
|
|
205
|
+
const images = rtfData.match(regexPicture);
|
|
206
|
+
const result = [];
|
|
207
|
+
if (images) {
|
|
208
|
+
for (const image of images) {
|
|
209
|
+
let imageType = false;
|
|
210
|
+
if (image.includes('\\pngblip')) {
|
|
211
|
+
imageType = 'image/png';
|
|
212
|
+
}
|
|
213
|
+
else if (image.includes('\\jpegblip')) {
|
|
214
|
+
imageType = 'image/jpeg';
|
|
215
|
+
}
|
|
216
|
+
if (imageType) {
|
|
217
|
+
result.push({
|
|
218
|
+
hex: image.replace(regexPictureHeader, '').replace(/[^\da-fA-F]/g, ''),
|
|
219
|
+
type: imageType
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
return result;
|
|
206
225
|
}
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
function replaceImagesFileSourceWithInlineRepresentation(
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
226
|
+
/**
|
|
227
|
+
* Replaces `src` attribute value of all given images with the corresponding base64 image representation.
|
|
228
|
+
*
|
|
229
|
+
* @param imageElements Array of image elements which will have its source replaced.
|
|
230
|
+
* @param imagesHexSources Array of images hex sources (usually the result of `extractImageDataFromRtf()` function).
|
|
231
|
+
* The array should be the same length as `imageElements` parameter.
|
|
232
|
+
*/
|
|
233
|
+
function replaceImagesFileSourceWithInlineRepresentation(imageElements, imagesHexSources, writer) {
|
|
234
|
+
// Assume there is an equal amount of image elements and images HEX sources so they can be matched accordingly based on existing order.
|
|
235
|
+
if (imageElements.length === imagesHexSources.length) {
|
|
236
|
+
for (let i = 0; i < imageElements.length; i++) {
|
|
237
|
+
const newSrc = `data:${imagesHexSources[i].type};base64,${_convertHexToBase64(imagesHexSources[i].hex)}`;
|
|
238
|
+
writer.setAttribute('src', newSrc, imageElements[i]);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
222
241
|
}
|