@ckeditor/ckeditor5-paste-from-office 35.3.2 → 36.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,222 +1,241 @@
1
1
  /**
2
- * @license Copyright (c) 2003-2022, CKSource Holding sp. z o.o. All rights reserved.
2
+ * @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
3
3
  * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
4
4
  */
5
-
6
5
  /**
7
6
  * @module paste-from-office/filters/image
8
7
  */
9
-
10
8
  /* globals btoa */
11
-
12
9
  import { Matcher, UpcastWriter } from 'ckeditor5/src/engine';
13
-
14
10
  /**
15
11
  * Replaces source attribute of all `<img>` elements representing regular
16
12
  * images (not the Word shapes) with inlined base64 image representation extracted from RTF or Blob data.
17
13
  *
18
- * @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment on which transform images.
19
- * @param {String} rtfData The RTF data from which images representation will be used.
14
+ * @param documentFragment Document fragment on which transform images.
15
+ * @param rtfData The RTF data from which images representation will be used.
20
16
  */
21
- export function replaceImagesSourceWithBase64( documentFragment, rtfData ) {
22
- if ( !documentFragment.childCount ) {
23
- return;
24
- }
25
-
26
- const upcastWriter = new UpcastWriter();
27
- const shapesIds = findAllShapesIds( documentFragment, upcastWriter );
28
-
29
- removeAllImgElementsRepresentingShapes( shapesIds, documentFragment, upcastWriter );
30
- removeAllShapeElements( documentFragment, upcastWriter );
31
-
32
- const images = findAllImageElementsWithLocalSource( documentFragment, upcastWriter );
33
-
34
- if ( images.length ) {
35
- replaceImagesFileSourceWithInlineRepresentation( images, extractImageDataFromRtf( rtfData ), upcastWriter );
36
- }
17
+ export function replaceImagesSourceWithBase64(documentFragment, rtfData) {
18
+ if (!documentFragment.childCount) {
19
+ return;
20
+ }
21
+ const upcastWriter = new UpcastWriter(documentFragment.document);
22
+ const shapesIds = findAllShapesIds(documentFragment, upcastWriter);
23
+ removeAllImgElementsRepresentingShapes(shapesIds, documentFragment, upcastWriter);
24
+ insertMissingImgs(shapesIds, documentFragment, upcastWriter);
25
+ removeAllShapeElements(documentFragment, upcastWriter);
26
+ const images = findAllImageElementsWithLocalSource(documentFragment, upcastWriter);
27
+ if (images.length) {
28
+ replaceImagesFileSourceWithInlineRepresentation(images, extractImageDataFromRtf(rtfData), upcastWriter);
29
+ }
37
30
  }
38
-
39
31
  /**
40
32
  * Converts given HEX string to base64 representation.
41
33
  *
42
- * @protected
43
- * @param {String} hexString The HEX string to be converted.
44
- * @returns {String} Base64 representation of a given HEX string.
34
+ * @internal
35
+ * @param hexString The HEX string to be converted.
36
+ * @returns Base64 representation of a given HEX string.
45
37
  */
46
- export function _convertHexToBase64( hexString ) {
47
- return btoa( hexString.match( /\w{2}/g ).map( char => {
48
- return String.fromCharCode( parseInt( char, 16 ) );
49
- } ).join( '' ) );
38
+ export function _convertHexToBase64(hexString) {
39
+ return btoa(hexString.match(/\w{2}/g).map(char => {
40
+ return String.fromCharCode(parseInt(char, 16));
41
+ }).join(''));
50
42
  }
51
-
52
- // Finds all shapes (`<v:*>...</v:*>`) ids. Shapes can represent images (canvas)
53
- // or Word shapes (which does not have RTF or Blob representation).
54
- //
55
- // @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment
56
- // from which to extract shape ids.
57
- // @param {module:engine/view/upcastwriter~UpcastWriter} writer
58
- // @returns {Array.<String>} Array of shape ids.
59
- function findAllShapesIds( documentFragment, writer ) {
60
- const range = writer.createRangeIn( documentFragment );
61
-
62
- const shapeElementsMatcher = new Matcher( {
63
- name: /v:(.+)/
64
- } );
65
-
66
- const shapesIds = [];
67
-
68
- for ( const value of range ) {
69
- if ( value.type != 'elementStart' ) {
70
- continue;
71
- }
72
-
73
- const el = value.item;
74
- const prevSiblingName = el.previousSibling && el.previousSibling.name || null;
75
-
76
- // If shape element have 'o:gfxdata' attribute and is not directly before `<v:shapetype>` element it means it represent Word shape.
77
- if ( shapeElementsMatcher.match( el ) && el.getAttribute( 'o:gfxdata' ) && prevSiblingName !== 'v:shapetype' ) {
78
- shapesIds.push( value.item.getAttribute( 'id' ) );
79
- }
80
- }
81
-
82
- return shapesIds;
43
+ /**
44
+ * Finds all shapes (`<v:*>...</v:*>`) ids. Shapes can represent images (canvas)
45
+ * or Word shapes (which does not have RTF or Blob representation).
46
+ *
47
+ * @param documentFragment Document fragment from which to extract shape ids.
48
+ * @returns Array of shape ids.
49
+ */
50
+ function findAllShapesIds(documentFragment, writer) {
51
+ const range = writer.createRangeIn(documentFragment);
52
+ const shapeElementsMatcher = new Matcher({
53
+ name: /v:(.+)/
54
+ });
55
+ const shapesIds = [];
56
+ for (const value of range) {
57
+ if (value.type != 'elementStart') {
58
+ continue;
59
+ }
60
+ const el = value.item;
61
+ const previousSibling = el.previousSibling;
62
+ const prevSiblingName = previousSibling && previousSibling.is('element') ? previousSibling.name : null;
63
+ // If shape element have 'o:gfxdata' attribute and is not directly before `<v:shapetype>` element it means it represent Word shape.
64
+ if (shapeElementsMatcher.match(el) && el.getAttribute('o:gfxdata') && prevSiblingName !== 'v:shapetype') {
65
+ shapesIds.push(value.item.getAttribute('id'));
66
+ }
67
+ }
68
+ return shapesIds;
83
69
  }
84
-
85
- // Removes all `<img>` elements which represents Word shapes and not regular images.
86
- //
87
- // @param {Array.<String>} shapesIds Shape ids which will be checked against `<img>` elements.
88
- // @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment from which to remove `<img>` elements.
89
- // @param {module:engine/view/upcastwriter~UpcastWriter} writer
90
- function removeAllImgElementsRepresentingShapes( shapesIds, documentFragment, writer ) {
91
- const range = writer.createRangeIn( documentFragment );
92
-
93
- const imageElementsMatcher = new Matcher( {
94
- name: 'img'
95
- } );
96
-
97
- const imgs = [];
98
-
99
- for ( const value of range ) {
100
- if ( imageElementsMatcher.match( value.item ) ) {
101
- const el = value.item;
102
- const shapes = el.getAttribute( 'v:shapes' ) ? el.getAttribute( 'v:shapes' ).split( ' ' ) : [];
103
-
104
- if ( shapes.length && shapes.every( shape => shapesIds.indexOf( shape ) > -1 ) ) {
105
- imgs.push( el );
106
- // Shapes may also have empty source while content is paste in some browsers (Safari).
107
- } else if ( !el.getAttribute( 'src' ) ) {
108
- imgs.push( el );
109
- }
110
- }
111
- }
112
-
113
- for ( const img of imgs ) {
114
- writer.remove( img );
115
- }
70
+ /**
71
+ * Removes all `<img>` elements which represents Word shapes and not regular images.
72
+ *
73
+ * @param shapesIds Shape ids which will be checked against `<img>` elements.
74
+ * @param documentFragment Document fragment from which to remove `<img>` elements.
75
+ */
76
+ function removeAllImgElementsRepresentingShapes(shapesIds, documentFragment, writer) {
77
+ const range = writer.createRangeIn(documentFragment);
78
+ const imageElementsMatcher = new Matcher({
79
+ name: 'img'
80
+ });
81
+ const imgs = [];
82
+ for (const value of range) {
83
+ if (value.item.is('element') && imageElementsMatcher.match(value.item)) {
84
+ const el = value.item;
85
+ const shapes = el.getAttribute('v:shapes') ? el.getAttribute('v:shapes').split(' ') : [];
86
+ if (shapes.length && shapes.every(shape => shapesIds.indexOf(shape) > -1)) {
87
+ imgs.push(el);
88
+ // Shapes may also have empty source while content is paste in some browsers (Safari).
89
+ }
90
+ else if (!el.getAttribute('src')) {
91
+ imgs.push(el);
92
+ }
93
+ }
94
+ }
95
+ for (const img of imgs) {
96
+ writer.remove(img);
97
+ }
116
98
  }
117
-
118
- // Removes all shape elements (`<v:*>...</v:*>`) so they do not pollute the output structure.
119
- //
120
- // @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment from which to remove shape elements.
121
- // @param {module:engine/view/upcastwriter~UpcastWriter} writer
122
- function removeAllShapeElements( documentFragment, writer ) {
123
- const range = writer.createRangeIn( documentFragment );
124
-
125
- const shapeElementsMatcher = new Matcher( {
126
- name: /v:(.+)/
127
- } );
128
-
129
- const shapes = [];
130
-
131
- for ( const value of range ) {
132
- if ( value.type == 'elementStart' && shapeElementsMatcher.match( value.item ) ) {
133
- shapes.push( value.item );
134
- }
135
- }
136
-
137
- for ( const shape of shapes ) {
138
- writer.remove( shape );
139
- }
99
+ /**
100
+ * Removes all shape elements (`<v:*>...</v:*>`) so they do not pollute the output structure.
101
+ *
102
+ * @param documentFragment Document fragment from which to remove shape elements.
103
+ */
104
+ function removeAllShapeElements(documentFragment, writer) {
105
+ const range = writer.createRangeIn(documentFragment);
106
+ const shapeElementsMatcher = new Matcher({
107
+ name: /v:(.+)/
108
+ });
109
+ const shapes = [];
110
+ for (const value of range) {
111
+ if (value.type == 'elementStart' && shapeElementsMatcher.match(value.item)) {
112
+ shapes.push(value.item);
113
+ }
114
+ }
115
+ for (const shape of shapes) {
116
+ writer.remove(shape);
117
+ }
118
+ }
119
+ /**
120
+ * Inserts `img` tags if there is none after a shape.
121
+ */
122
+ function insertMissingImgs(shapeIds, documentFragment, writer) {
123
+ const range = writer.createRangeIn(documentFragment);
124
+ const shapes = [];
125
+ for (const value of range) {
126
+ if (value.type == 'elementStart' && value.item.is('element', 'v:shape')) {
127
+ const id = value.item.getAttribute('id');
128
+ if (shapeIds.includes(id)) {
129
+ continue;
130
+ }
131
+ if (!containsMatchingImg(value.item.parent.getChildren(), id)) {
132
+ shapes.push(value.item);
133
+ }
134
+ }
135
+ }
136
+ for (const shape of shapes) {
137
+ const attrs = {
138
+ src: findSrc(shape)
139
+ };
140
+ if (shape.hasAttribute('alt')) {
141
+ attrs.alt = shape.getAttribute('alt');
142
+ }
143
+ const img = writer.createElement('img', attrs);
144
+ writer.insertChild(shape.index + 1, img, shape.parent);
145
+ }
146
+ function containsMatchingImg(nodes, id) {
147
+ for (const node of nodes) {
148
+ /* istanbul ignore else */
149
+ if (node.is('element')) {
150
+ if (node.name == 'img' && node.getAttribute('v:shapes') == id) {
151
+ return true;
152
+ }
153
+ if (containsMatchingImg(node.getChildren(), id)) {
154
+ return true;
155
+ }
156
+ }
157
+ }
158
+ return false;
159
+ }
160
+ function findSrc(shape) {
161
+ for (const child of shape.getChildren()) {
162
+ /* istanbul ignore else */
163
+ if (child.is('element') && child.getAttribute('src')) {
164
+ return child.getAttribute('src');
165
+ }
166
+ }
167
+ }
140
168
  }
141
-
142
- // Finds all `<img>` elements in a given document fragment which have source pointing to local `file://` resource.
143
- //
144
- // @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment in which to look for `<img>` elements.
145
- // @param {module:engine/view/upcastwriter~UpcastWriter} writer
146
- // @returns {Object} result All found images grouped by source type.
147
- // @returns {Array.<module:engine/view/element~Element>} result.file Array of found `<img>` elements with `file://` source.
148
- // @returns {Array.<module:engine/view/element~Element>} result.blob Array of found `<img>` elements with `blob:` source.
149
- function findAllImageElementsWithLocalSource( documentFragment, writer ) {
150
- const range = writer.createRangeIn( documentFragment );
151
-
152
- const imageElementsMatcher = new Matcher( {
153
- name: 'img'
154
- } );
155
-
156
- const imgs = [];
157
-
158
- for ( const value of range ) {
159
- if ( imageElementsMatcher.match( value.item ) ) {
160
- if ( value.item.getAttribute( 'src' ).startsWith( 'file://' ) ) {
161
- imgs.push( value.item );
162
- }
163
- }
164
- }
165
-
166
- return imgs;
169
+ /**
170
+ * Finds all `<img>` elements in a given document fragment which have source pointing to local `file://` resource.
171
+ *
172
+ * @param documentFragment Document fragment in which to look for `<img>` elements.
173
+ * @returns result All found images grouped by source type.
174
+ */
175
+ function findAllImageElementsWithLocalSource(documentFragment, writer) {
176
+ const range = writer.createRangeIn(documentFragment);
177
+ const imageElementsMatcher = new Matcher({
178
+ name: 'img'
179
+ });
180
+ const imgs = [];
181
+ for (const value of range) {
182
+ if (value.item.is('element') && imageElementsMatcher.match(value.item)) {
183
+ if (value.item.getAttribute('src').startsWith('file://')) {
184
+ imgs.push(value.item);
185
+ }
186
+ }
187
+ }
188
+ return imgs;
167
189
  }
168
-
169
- // Extracts all images HEX representations from a given RTF data.
170
- //
171
- // @param {String} rtfData The RTF data from which to extract images HEX representation.
172
- // @returns {Array.<Object>} Array of found HEX representations. Each array item is an object containing:
173
- //
174
- // * {String} hex Image representation in HEX format.
175
- // * {string} type Type of image, `image/png` or `image/jpeg`.
176
- function extractImageDataFromRtf( rtfData ) {
177
- if ( !rtfData ) {
178
- return [];
179
- }
180
-
181
- const regexPictureHeader = /{\\pict[\s\S]+?\\bliptag-?\d+(\\blipupi-?\d+)?({\\\*\\blipuid\s?[\da-fA-F]+)?[\s}]*?/;
182
- const regexPicture = new RegExp( '(?:(' + regexPictureHeader.source + '))([\\da-fA-F\\s]+)\\}', 'g' );
183
- const images = rtfData.match( regexPicture );
184
- const result = [];
185
-
186
- if ( images ) {
187
- for ( const image of images ) {
188
- let imageType = false;
189
-
190
- if ( image.includes( '\\pngblip' ) ) {
191
- imageType = 'image/png';
192
- } else if ( image.includes( '\\jpegblip' ) ) {
193
- imageType = 'image/jpeg';
194
- }
195
-
196
- if ( imageType ) {
197
- result.push( {
198
- hex: image.replace( regexPictureHeader, '' ).replace( /[^\da-fA-F]/g, '' ),
199
- type: imageType
200
- } );
201
- }
202
- }
203
- }
204
-
205
- return result;
190
+ /**
191
+ * Extracts all images HEX representations from a given RTF data.
192
+ *
193
+ * @param rtfData The RTF data from which to extract images HEX representation.
194
+ * @returns Array of found HEX representations. Each array item is an object containing:
195
+ *
196
+ * * hex Image representation in HEX format.
197
+ * * type Type of image, `image/png` or `image/jpeg`.
198
+ */
199
+ function extractImageDataFromRtf(rtfData) {
200
+ if (!rtfData) {
201
+ return [];
202
+ }
203
+ const regexPictureHeader = /{\\pict[\s\S]+?\\bliptag-?\d+(\\blipupi-?\d+)?({\\\*\\blipuid\s?[\da-fA-F]+)?[\s}]*?/;
204
+ const regexPicture = new RegExp('(?:(' + regexPictureHeader.source + '))([\\da-fA-F\\s]+)\\}', 'g');
205
+ const images = rtfData.match(regexPicture);
206
+ const result = [];
207
+ if (images) {
208
+ for (const image of images) {
209
+ let imageType = false;
210
+ if (image.includes('\\pngblip')) {
211
+ imageType = 'image/png';
212
+ }
213
+ else if (image.includes('\\jpegblip')) {
214
+ imageType = 'image/jpeg';
215
+ }
216
+ if (imageType) {
217
+ result.push({
218
+ hex: image.replace(regexPictureHeader, '').replace(/[^\da-fA-F]/g, ''),
219
+ type: imageType
220
+ });
221
+ }
222
+ }
223
+ }
224
+ return result;
206
225
  }
207
-
208
- // Replaces `src` attribute value of all given images with the corresponding base64 image representation.
209
- //
210
- // @param {Array.<module:engine/view/element~Element>} imageElements Array of image elements which will have its source replaced.
211
- // @param {Array.<Object>} imagesHexSources Array of images hex sources (usually the result of `extractImageDataFromRtf()` function).
212
- // The array should be the same length as `imageElements` parameter.
213
- // @param {module:engine/view/upcastwriter~UpcastWriter} writer
214
- function replaceImagesFileSourceWithInlineRepresentation( imageElements, imagesHexSources, writer ) {
215
- // Assume there is an equal amount of image elements and images HEX sources so they can be matched accordingly based on existing order.
216
- if ( imageElements.length === imagesHexSources.length ) {
217
- for ( let i = 0; i < imageElements.length; i++ ) {
218
- const newSrc = `data:${ imagesHexSources[ i ].type };base64,${ _convertHexToBase64( imagesHexSources[ i ].hex ) }`;
219
- writer.setAttribute( 'src', newSrc, imageElements[ i ] );
220
- }
221
- }
226
+ /**
227
+ * Replaces `src` attribute value of all given images with the corresponding base64 image representation.
228
+ *
229
+ * @param imageElements Array of image elements which will have its source replaced.
230
+ * @param imagesHexSources Array of images hex sources (usually the result of `extractImageDataFromRtf()` function).
231
+ * The array should be the same length as `imageElements` parameter.
232
+ */
233
+ function replaceImagesFileSourceWithInlineRepresentation(imageElements, imagesHexSources, writer) {
234
+ // Assume there is an equal amount of image elements and images HEX sources so they can be matched accordingly based on existing order.
235
+ if (imageElements.length === imagesHexSources.length) {
236
+ for (let i = 0; i < imageElements.length; i++) {
237
+ const newSrc = `data:${imagesHexSources[i].type};base64,${_convertHexToBase64(imagesHexSources[i].hex)}`;
238
+ writer.setAttribute('src', newSrc, imageElements[i]);
239
+ }
240
+ }
222
241
  }