@adobe/helix-importer 2.2.3 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ ## [2.3.1](https://github.com/adobe/helix-importer/compare/v2.3.0...v2.3.1) (2022-11-23)
2
+
3
+
4
+ ### Bug Fixes
5
+
6
+ * **deps:** update dependency @adobe/helix-md2docx to v2.0.24 ([#43](https://github.com/adobe/helix-importer/issues/43)) ([fd2bc21](https://github.com/adobe/helix-importer/commit/fd2bc213d9463dfd9c250211eba5d68ae4fc674a))
7
+
8
+ # [2.3.0](https://github.com/adobe/helix-importer/compare/v2.2.3...v2.3.0) (2022-11-23)
9
+
10
+
11
+ ### Features
12
+
13
+ * give access to the original dom, before any preprocessing ([#41](https://github.com/adobe/helix-importer/issues/41)) ([6c394c5](https://github.com/adobe/helix-importer/commit/6c394c5991b66765e43c04d4083a5879d6669f4e))
14
+
1
15
  ## [2.2.3](https://github.com/adobe/helix-importer/compare/v2.2.2...v2.2.3) (2022-11-20)
2
16
 
3
17
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/helix-importer",
3
- "version": "2.2.3",
3
+ "version": "2.3.1",
4
4
  "description": "Helix Importer tool: create md / docx from html",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -26,7 +26,7 @@
26
26
  },
27
27
  "devDependencies": {
28
28
  "@adobe/eslint-config-helix": "1.3.2",
29
- "@adobe/helix-docx2md": "1.3.6",
29
+ "@adobe/helix-docx2md": "1.3.7",
30
30
  "@adobe/helix-mediahandler": "1.2.17",
31
31
  "@semantic-release/changelog": "6.0.1",
32
32
  "@semantic-release/exec": "6.0.3",
@@ -47,7 +47,7 @@
47
47
  "license": "Apache-2.0",
48
48
  "dependencies": {
49
49
  "@adobe/helix-markdown-support": "6.0.0",
50
- "@adobe/helix-md2docx": "2.0.22",
50
+ "@adobe/helix-md2docx": "2.0.24",
51
51
  "@adobe/mdast-util-gridtables": "1.0.3",
52
52
  "@adobe/remark-gridtables": "1.0.0",
53
53
  "form-data": "4.0.0",
@@ -21,7 +21,7 @@ import Utils from '../utils/Utils.js';
21
21
 
22
22
  // import docxStylesXML from '../resources/styles.xml';
23
23
 
24
- function preprocessDOM(document) {
24
+ function setBackgroundImagesFromCSS(document) {
25
25
  const elements = document.querySelectorAll('body, header, footer, div, span, section, main');
26
26
  const getComputedStyle = document.defaultView?.getComputedStyle;
27
27
  if (getComputedStyle) {
@@ -76,8 +76,19 @@ async function html2x(
76
76
  }
77
77
  }
78
78
 
79
- if (config.preprocess !== false) {
80
- preprocessDOM(doc);
79
+ // for more advanced use cases, give access to the original dom with
80
+ // no preprocessing at all
81
+ if (transformer.preprocess) {
82
+ transformer.preprocess({
83
+ url,
84
+ document: doc,
85
+ html: doc.documentElement.outerHTML,
86
+ params,
87
+ });
88
+ }
89
+
90
+ if (config.setBackgroundImagesFromCSS !== false) {
91
+ setBackgroundImagesFromCSS(doc);
81
92
  }
82
93
 
83
94
  const html = doc.documentElement.outerHTML;
@@ -72,6 +72,7 @@ describe('html2x parameters', () => {
72
72
  await html2md(URL, HTML, {
73
73
  transformDOM: testParams,
74
74
  generateDocumentPath: testParams,
75
+ preprocess: testParams,
75
76
  }, null, {
76
77
  originalURL: ORIGNAL_URL,
77
78
  });
@@ -79,6 +80,7 @@ describe('html2x parameters', () => {
79
80
  await html2docx(URL, HTML, {
80
81
  transformDOM: testParams,
81
82
  generateDocumentPath: testParams,
83
+ preprocess: testParams,
82
84
  }, null, {
83
85
  originalURL: ORIGNAL_URL,
84
86
  });
@@ -87,12 +89,14 @@ describe('html2x parameters', () => {
87
89
  it('parameters are correctly passed in multi mode', async () => {
88
90
  await html2md(URL, HTML, {
89
91
  transform: testParams,
92
+ preprocess: testParams,
90
93
  }, null, {
91
94
  originalURL: ORIGNAL_URL,
92
95
  });
93
96
 
94
97
  await html2docx(URL, HTML, {
95
98
  transform: testParams,
99
+ preprocess: testParams,
96
100
  }, null, {
97
101
  originalURL: ORIGNAL_URL,
98
102
  });
@@ -205,6 +209,30 @@ describe('html2md tests', () => {
205
209
  );
206
210
  strictEqual(out.html.trim(), '<body><img src="./image.png"></body>');
207
211
  });
212
+
213
+ it('html2md removes images with src attributes', async () => {
214
+ const out = await html2md('https://www.sample.com/page.html', '<html><body><img src="data:abc"></body></html>');
215
+ strictEqual(out.html.trim(), '<body></body>');
216
+ strictEqual(out.md.trim(), '');
217
+ });
218
+
219
+ it('html2md set image src with data-src attribute value', async () => {
220
+ const out = await html2md('https://www.sample.com/page.html', '<html><body><img src="data:abc" data-src="./image.jpg"></body></html>');
221
+ strictEqual(out.html.trim(), '<body><img src="./image.jpg" data-src="./image.jpg"></body>');
222
+ strictEqual(out.md.trim(), '![][image0]\n\n[image0]: ./image.jpg');
223
+ });
224
+
225
+ it('html2md allows to preprocess the document', async () => {
226
+ const out = await html2md('https://www.sample.com/page.html', '<html><body><img src="data:abc" data-fancy-src="./image.jpg"></body></html>', {
227
+ preprocess: ({ document }) => {
228
+ const img = document.querySelector('img');
229
+ img.setAttribute('src', img.getAttribute('data-fancy-src'));
230
+ img.removeAttribute('data-fancy-src');
231
+ },
232
+ });
233
+ strictEqual(out.html.trim(), '<body><img src="./image.jpg"></body>');
234
+ strictEqual(out.md.trim(), '![][image0]\n\n[image0]: ./image.jpg');
235
+ });
208
236
  });
209
237
 
210
238
  describe('html2docx tests', () => {