@adobe/helix-importer 2.9.0 → 2.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ ## [2.9.2](https://github.com/adobe/helix-importer/compare/v2.9.1...v2.9.2) (2023-05-04)
2
+
3
+
4
+ ### Bug Fixes
5
+
6
+ * replace nbsp with space ([#142](https://github.com/adobe/helix-importer/issues/142)) ([f093c74](https://github.com/adobe/helix-importer/commit/f093c749be0dcd75ec3a2b44313346fac2f3ede4))
7
+
8
+ ## [2.9.1](https://github.com/adobe/helix-importer/compare/v2.9.0...v2.9.1) (2023-04-22)
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * **deps:** update adobe fixes ([#139](https://github.com/adobe/helix-importer/issues/139)) ([7cf1d8c](https://github.com/adobe/helix-importer/commit/7cf1d8cfc72f854b1c1268a4360592ae783ada1e))
14
+
1
15
  # [2.9.0](https://github.com/adobe/helix-importer/compare/v2.8.13...v2.9.0) (2023-04-20)
2
16
 
3
17
 
package/README.md CHANGED
@@ -34,7 +34,7 @@ async function main() {
34
34
  }
35
35
  ```
36
36
 
37
- In this example, the [WPContentPager](./src/wp/explorers/WPContentPager.ts) extends the [PagignExplorer](src/explorer/PagingExplorer.ts) which implements the 2 methods:
37
+ In this example, the [WPContentPager](./src/wp/explorers/WPContentPager.ts) extends the [PagingExplorer](src/explorer/PagingExplorer.ts) which implements the 2 methods:
38
38
  - `fetch` which defines how to fetch one page on results
39
39
  - `explore` which extracts the list of urls present on that page
40
40
 
@@ -71,4 +71,4 @@ TODO: publish npm module
71
71
 
72
72
  ```js
73
73
  import { ... } from '@adobe/helix-importer';
74
- ```
74
+ ```
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/helix-importer",
3
- "version": "2.9.0",
3
+ "version": "2.9.2",
4
4
  "description": "Helix Importer tool: create md / docx from html",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -27,24 +27,24 @@
27
27
  "devDependencies": {
28
28
  "@adobe/eslint-config-helix": "2.0.2",
29
29
  "@adobe/helix-docx2md": "1.4.1",
30
- "@adobe/helix-mediahandler": "2.1.5",
30
+ "@adobe/helix-mediahandler": "2.1.7",
31
31
  "@semantic-release/changelog": "6.0.3",
32
32
  "@semantic-release/exec": "6.0.3",
33
33
  "@semantic-release/git": "10.0.1",
34
34
  "c8": "7.13.0",
35
35
  "dirname-filename-esm": "1.1.1",
36
- "eslint": "8.38.0",
36
+ "eslint": "8.39.0",
37
37
  "husky": "8.0.3",
38
- "lint-staged": "13.2.1",
38
+ "lint-staged": "13.2.2",
39
39
  "mocha": "10.2.0",
40
40
  "mocha-multi-reporters": "1.5.1",
41
41
  "mock-fs": "5.2.0",
42
- "semantic-release": "21.0.1"
42
+ "semantic-release": "21.0.2"
43
43
  },
44
44
  "license": "Apache-2.0",
45
45
  "dependencies": {
46
46
  "@adobe/helix-markdown-support": "6.1.1",
47
- "@adobe/helix-md2docx": "2.0.50",
47
+ "@adobe/helix-md2docx": "2.0.51",
48
48
  "@adobe/mdast-util-gridtables": "1.0.6",
49
49
  "@adobe/remark-gridtables": "1.0.2",
50
50
  "form-data": "4.0.0",
@@ -103,10 +103,12 @@ export default class DOMUtils {
103
103
  (tag.textContent === ''
104
104
  || tag.textContent === ' '
105
105
  || tag.textContent === ' '
106
- || tag.textContent.charCodeAt(0) === 160)
106
+ || (tag.textContent.charCodeAt(0) === 160 && tag.textContent.length === 1))
107
107
  && !tag.querySelector(DOMUtils.EMPTY_TAGS_TO_PRESERVE.join(','))
108
108
  ) {
109
109
  tag.remove();
110
+ } else {
111
+ tag.innerHTML = tag.innerHTML.replace(/ /gm, ' ');
110
112
  }
111
113
  }
112
114
  }
@@ -145,7 +147,7 @@ export default class DOMUtils {
145
147
  // remove spans
146
148
  document.querySelectorAll('span').forEach((span) => {
147
149
  // do not touch spans with images and span with a css class or an id
148
- if (!span.querySelector('img') && span.classList.length === 0 && !span.id) {
150
+ if (!span.querySelector('img') && span.classList.length === 0 && !span.id && !span.getAttribute('style')) {
149
151
  if (span.textContent === '') {
150
152
  span.remove();
151
153
  } else {
@@ -4,10 +4,11 @@
4
4
  <p>A simple paragraph</p>
5
5
  <p>A paragraph with a br inside.<br> This should be next line.</p>
6
6
  <p>A paragraph with a br at the end.<br></p>
7
+ <p>A paragraph with a br at the end and &amp;nbsp; "&nbsp;".<br> &nbsp;</p>
7
8
  <p>A paragraph followed by a br</p>
8
9
  <br>
9
10
  <p>A paragraph after the br</p>
10
- &nbsp;
11
+ <p>&nbsp;</p>
11
12
  <p>A paragraph after the nbsp;</p>
12
13
  </body>
13
14
  </html>
@@ -7,6 +7,8 @@ This should be next line.
7
7
 
8
8
  A paragraph with a br at the end.
9
9
 
10
+ A paragraph with a br at the end and \&nbsp; " ".
11
+
10
12
  A paragraph followed by a br
11
13
 
12
14
  \
@@ -14,6 +16,4 @@ A paragraph followed by a br
14
16
 
15
17
  A paragraph after the br
16
18
 
17
-
18
-
19
19
  A paragraph after the nbsp;
@@ -95,6 +95,11 @@ describe('DOMUtils#reviewParagraphs tests', () => {
95
95
  test('<p><video width="320" height="240" controls=""><source src="movie.mp4" type="video/mp4"></video></p>', '<p><video width="320" height="240" controls=""><source src="movie.mp4" type="video/mp4"></video></p>');
96
96
  test('<p><iframe src="www.iframe.com"></iframe></p>', '<p><iframe src="www.iframe.com"></iframe></p>');
97
97
  });
98
+
99
+ it('reviewParagraphs replaces &nbsp; with spaces', () => {
100
+ test('<p>usefull with space&nbsp;</p>', '<p>usefull with space </p>');
101
+ test('<p>&nbsp;more&nbsp;spaces&nbsp;<br> &nbsp;</p>', '<p> more spaces <br> </p>');
102
+ });
98
103
  });
99
104
 
100
105
  describe('DOMUtils#reviewHeadings tests', () => {
@@ -193,6 +198,9 @@ describe('DOMUtils#removeSpans tests', () => {
193
198
  test('<div>Spans potentially used to do layouting: <span class="tab1">tab1</span><span class="tab2">tab2</span></div>', '<div>Spans potentially used to do layouting: <span class="tab1">tab1</span><span class="tab2">tab2</span></div>');
194
199
  test('<div>Spans potentially used to do layouting: <span id="tab1">tab1</span><span id="tab2">tab2</span></div>', '<div>Spans potentially used to do layouting: <span id="tab1">tab1</span><span id="tab2">tab2</span></div>');
195
200
  });
201
+ it('keeps styled spans', () => {
202
+ test('<p><span style="text-decoration: underline;">This should be underlined.</span></p>', '<p><span style="text-decoration: underline;">This should be underlined.</span></p>');
203
+ });
196
204
  });
197
205
 
198
206
  describe('DOMUtils#removeNoscripts tests', () => {