@adobe/helix-importer 2.9.41 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/README.md +5 -38
- package/package.json +13 -6
- package/src/importer/HTML2x.js +19 -30
- package/src/importer/PageImporter.js +12 -4
- package/src/importer/PageImporterParams.js +2 -0
- package/src/importer/defaults/generateDocumentPath.js +24 -0
- package/src/importer/defaults/rules/adjustImageUrls.js +28 -0
- package/src/importer/defaults/rules/convertIcons.js +25 -0
- package/src/importer/defaults/rules/createMetadata.js +102 -0
- package/src/importer/defaults/rules/transformBackgroundImages.js +21 -0
- package/src/importer/defaults/transformDOM.js +42 -0
- package/src/index.js +13 -11
- package/src/utils/BrowserUtils.js +29 -0
- package/src/utils/DOMUtils.js +19 -13
- package/src/wp/WPUtils.js +1 -3
- package/{src/explorer/PagingExplorerParams.js → test/TestUtils.js} +8 -4
- package/test/browser/BrowserUtils.test.js +42 -0
- package/test/browser/DOMUtils.test.js +67 -0
- package/test/importers/HTML2x.spec.js +122 -38
- package/test/importers/PageImporter.spec.js +37 -2
- package/test/importers/defaults/fixtures/adjust-image-urls.expected.html +7 -0
- package/test/importers/defaults/fixtures/adjust-image-urls.input.html +10 -0
- package/test/importers/defaults/fixtures/background-image.expected.html +13 -0
- package/test/importers/defaults/fixtures/background-image.input.html +10 -0
- package/test/importers/defaults/fixtures/cleanup.expected.html +5 -0
- package/test/importers/defaults/fixtures/cleanup.input.html +11 -0
- package/test/importers/defaults/fixtures/default.expected.html +4 -0
- package/test/importers/defaults/fixtures/default.input.html +6 -0
- package/test/importers/defaults/fixtures/icons.expected.html +4 -0
- package/test/importers/defaults/fixtures/icons.input.html +6 -0
- package/test/importers/defaults/fixtures/metadata.all.diff.expected.html +40 -0
- package/test/importers/defaults/fixtures/metadata.all.diff.input.html +17 -0
- package/test/importers/defaults/fixtures/metadata.all.same.expected.html +20 -0
- package/test/importers/defaults/fixtures/metadata.all.same.input.html +17 -0
- package/test/importers/defaults/fixtures/metadata.basic.expected.html +16 -0
- package/test/importers/defaults/fixtures/metadata.basic.input.html +9 -0
- package/test/importers/defaults/fixtures/metadata.image.expected.html +12 -0
- package/test/importers/defaults/fixtures/metadata.image.input.html +9 -0
- package/test/importers/defaults/fixtures/metadata.og.expected.html +16 -0
- package/test/importers/defaults/fixtures/metadata.og.input.html +9 -0
- package/test/importers/defaults/fixtures/metadata.twitter.expected.html +16 -0
- package/test/importers/defaults/fixtures/metadata.twitter.input.html +9 -0
- package/test/importers/defaults/generateDocumentPath.spec.js +32 -0
- package/test/importers/defaults/transformDOM.spec.js +94 -0
- package/test/importers/fixtures/video.spec.html +11 -0
- package/test/importers/fixtures/video.spec.md +7 -0
- package/test/utils/DOMUtils.spec.js +23 -4
- package/src/explorer/PagingExplorer.js +0 -81
- package/src/wp/explorers/WPAdminAjaxPager.js +0 -51
- package/src/wp/explorers/WPContentPager.js +0 -48
- package/src/wp/explorers/WPPostWrapPager.js +0 -43
- package/test/explorers/PagingExplorer.spec.js +0 -280
package/src/utils/DOMUtils.js
CHANGED
|
@@ -10,11 +10,15 @@
|
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
|
-
import { JSDOM } from 'jsdom';
|
|
14
|
-
|
|
15
13
|
export default class DOMUtils {
|
|
16
14
|
static EMPTY_TAGS_TO_PRESERVE = ['img', 'video', 'iframe', 'div', 'picture'];
|
|
17
15
|
|
|
16
|
+
static fragment(document, string) {
|
|
17
|
+
const tpl = document.createElement('template');
|
|
18
|
+
tpl.innerHTML = string;
|
|
19
|
+
return tpl.content;
|
|
20
|
+
}
|
|
21
|
+
|
|
18
22
|
static reviewInlineElement(document, tagName) {
|
|
19
23
|
let tags = [...document.querySelectorAll(tagName)];
|
|
20
24
|
// first pass, remove empty nodes
|
|
@@ -48,7 +52,7 @@ export default class DOMUtils {
|
|
|
48
52
|
for (let i = tags.length - 1; i >= 0; i -= 1) {
|
|
49
53
|
const tag = tags[i];
|
|
50
54
|
if (tag.innerHTML === '.' || tag.innerHTML === '. ' || tag.innerHTML === ':' || tag.innerHTML === ': ') {
|
|
51
|
-
tag.replaceWith(
|
|
55
|
+
tag.replaceWith(DOMUtils.fragment(document, tag.innerHTML));
|
|
52
56
|
} else {
|
|
53
57
|
const { innerHTML } = tag;
|
|
54
58
|
if (tag.previousSibling) {
|
|
@@ -82,13 +86,13 @@ export default class DOMUtils {
|
|
|
82
86
|
// move trailing space to a new text node outside of current element
|
|
83
87
|
tag.innerHTML = innerHTML.slice(0, innerHTML.length - 1);
|
|
84
88
|
({ innerHTML } = tag);
|
|
85
|
-
tag.after(
|
|
89
|
+
tag.after(DOMUtils.fragment(document, '<span> </span>'));
|
|
86
90
|
}
|
|
87
91
|
|
|
88
92
|
if (innerHTML.indexOf(' ') === 0) {
|
|
89
93
|
// move leading space to a new text node outside of current element
|
|
90
94
|
tag.innerHTML = innerHTML.slice(1);
|
|
91
|
-
tag.before(
|
|
95
|
+
tag.before(DOMUtils.fragment(document, '<span> </span>'));
|
|
92
96
|
}
|
|
93
97
|
}
|
|
94
98
|
}
|
|
@@ -146,7 +150,7 @@ export default class DOMUtils {
|
|
|
146
150
|
if (span.textContent === '') {
|
|
147
151
|
span.remove();
|
|
148
152
|
} else {
|
|
149
|
-
span.replaceWith(
|
|
153
|
+
span.replaceWith(DOMUtils.fragment(document, span.innerHTML));
|
|
150
154
|
}
|
|
151
155
|
}
|
|
152
156
|
});
|
|
@@ -156,7 +160,7 @@ export default class DOMUtils {
|
|
|
156
160
|
selectors.forEach((selector) => {
|
|
157
161
|
document.querySelectorAll(selector).forEach((elem) => {
|
|
158
162
|
const captionText = elem.textContent.trim();
|
|
159
|
-
elem.parentNode.insertBefore(
|
|
163
|
+
elem.parentNode.insertBefore(DOMUtils.fragment(document, `<p><em>${captionText}</em></p>`), elem);
|
|
160
164
|
elem.remove();
|
|
161
165
|
});
|
|
162
166
|
});
|
|
@@ -198,8 +202,8 @@ export default class DOMUtils {
|
|
|
198
202
|
return table;
|
|
199
203
|
}
|
|
200
204
|
|
|
201
|
-
static generateEmbed(url) {
|
|
202
|
-
return
|
|
205
|
+
static generateEmbed(document, url) {
|
|
206
|
+
return DOMUtils.fragment(document, `<table><tr><th>Embed</th></tr><tr><td><a href="${url}">${url}</a></td></tr></table>`);
|
|
203
207
|
}
|
|
204
208
|
|
|
205
209
|
static replaceEmbeds(document) {
|
|
@@ -208,7 +212,7 @@ export default class DOMUtils {
|
|
|
208
212
|
const dataSrc = iframe.getAttribute('data-src');
|
|
209
213
|
const url = dataSrc || src;
|
|
210
214
|
if (url) {
|
|
211
|
-
iframe.after(DOMUtils.generateEmbed(url));
|
|
215
|
+
iframe.after(DOMUtils.generateEmbed(document, url));
|
|
212
216
|
}
|
|
213
217
|
iframe.remove();
|
|
214
218
|
});
|
|
@@ -218,7 +222,7 @@ export default class DOMUtils {
|
|
|
218
222
|
if (video.autoplay) {
|
|
219
223
|
blockType = 'Animation';
|
|
220
224
|
}
|
|
221
|
-
const anim =
|
|
225
|
+
const anim = DOMUtils.fragment(document, `<table><tr><th>${blockType}</th></tr><tr><td>${video.outerHTML}</td></tr></table>`);
|
|
222
226
|
video.replaceWith(anim);
|
|
223
227
|
});
|
|
224
228
|
}
|
|
@@ -254,7 +258,9 @@ export default class DOMUtils {
|
|
|
254
258
|
const styleAttr = element?.getAttribute('style')?.split(';');
|
|
255
259
|
if (styleAttr) {
|
|
256
260
|
styleAttr.forEach((style) => {
|
|
257
|
-
const
|
|
261
|
+
const split = style.split(':');
|
|
262
|
+
const prop = split.shift();
|
|
263
|
+
const value = split.join(':').trim();
|
|
258
264
|
if (prop === 'background-image') {
|
|
259
265
|
const trimmedValue = value.replace(/\s/g, '');
|
|
260
266
|
const elStyle = element.style;
|
|
@@ -263,7 +269,7 @@ export default class DOMUtils {
|
|
|
263
269
|
});
|
|
264
270
|
const url = element.style.backgroundImage;
|
|
265
271
|
if (url && url.toLowerCase() !== 'none') {
|
|
266
|
-
const src = url.replace(/url\(/gm, '').replace(/'/gm, '').replace(/\)/gm, '');
|
|
272
|
+
const src = url.replace(/url\(/gm, '').replace(/'/gm, '').replace(/"/gm, '').replace(/\)/gm, '');
|
|
267
273
|
const img = document.createElement('img');
|
|
268
274
|
img.src = src;
|
|
269
275
|
return img;
|
package/src/wp/WPUtils.js
CHANGED
|
@@ -9,8 +9,6 @@
|
|
|
9
9
|
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
|
-
import { JSDOM } from 'jsdom';
|
|
13
|
-
|
|
14
12
|
import DOMUtils from '../utils/DOMUtils.js';
|
|
15
13
|
|
|
16
14
|
export default class WPUtils {
|
|
@@ -27,7 +25,7 @@ export default class WPUtils {
|
|
|
27
25
|
? item.parentNode.nextElementSibling
|
|
28
26
|
: item.nextElementSibling;
|
|
29
27
|
const captionText = elem.textContent.trim();
|
|
30
|
-
elem.parentNode.insertBefore(
|
|
28
|
+
elem.parentNode.insertBefore(DOMUtils.fragment(document, `<p><em>${captionText}</em><p>`), elem);
|
|
31
29
|
elem.remove();
|
|
32
30
|
}
|
|
33
31
|
});
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright
|
|
2
|
+
* Copyright 2023 Adobe. All rights reserved.
|
|
3
3
|
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
5
|
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
@@ -10,8 +10,12 @@
|
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
nbMaxPages;
|
|
13
|
+
import { JSDOM } from 'jsdom';
|
|
15
14
|
|
|
16
|
-
|
|
15
|
+
export default class TestUtils {
|
|
16
|
+
// test environment createDocumentFromString version using JSDOM
|
|
17
|
+
static createDocumentFromString(html) {
|
|
18
|
+
const { document } = new JSDOM(html, { runScripts: undefined }).window;
|
|
19
|
+
return document;
|
|
20
|
+
}
|
|
17
21
|
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2023 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
/* eslint-env mocha */
|
|
13
|
+
/* global Document, HTMLElement */
|
|
14
|
+
|
|
15
|
+
import { expect } from '@esm-bundle/chai';
|
|
16
|
+
import BrowserUtils from '../../src/utils/BrowserUtils.js';
|
|
17
|
+
|
|
18
|
+
describe('BrowserUtils#createDocumentFromString', () => {
|
|
19
|
+
it('createDocumentFromString can parse a simple string', () => {
|
|
20
|
+
const document = BrowserUtils.createDocumentFromString('<html><head><title>Test</title></head><body><h1>Hello World</h1></body></html>');
|
|
21
|
+
expect(document).to.be.an.instanceof(Document);
|
|
22
|
+
expect(document.documentElement).to.be.an.instanceof(HTMLElement);
|
|
23
|
+
|
|
24
|
+
const title = document.querySelector('title');
|
|
25
|
+
expect(title).to.be.an.instanceof(HTMLElement);
|
|
26
|
+
expect(title.textContent).to.equal('Test');
|
|
27
|
+
|
|
28
|
+
const h1 = document.querySelector('h1');
|
|
29
|
+
expect(h1).to.be.an.instanceof(HTMLElement);
|
|
30
|
+
expect(h1.textContent).to.equal('Hello World');
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it('createDocumentFromString can parse a non document string', () => {
|
|
34
|
+
const document = BrowserUtils.createDocumentFromString('<h1>Hello World</h1>');
|
|
35
|
+
expect(document).to.be.an.instanceof(Document);
|
|
36
|
+
expect(document.documentElement).to.be.an.instanceof(HTMLElement);
|
|
37
|
+
|
|
38
|
+
const h1 = document.querySelector('h1');
|
|
39
|
+
expect(h1).to.be.an.instanceof(HTMLElement);
|
|
40
|
+
expect(h1.textContent).to.equal('Hello World');
|
|
41
|
+
});
|
|
42
|
+
});
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2023 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
/* eslint-env mocha */
|
|
13
|
+
/* eslint-disable no-unused-expressions */
|
|
14
|
+
|
|
15
|
+
import { expect } from '@esm-bundle/chai';
|
|
16
|
+
import BrowserUtils from '../../src/utils/BrowserUtils.js';
|
|
17
|
+
import DOMUtils from '../../src/utils/DOMUtils.js';
|
|
18
|
+
|
|
19
|
+
const createElement = (document, tag, attrs, styles, innerHTML) => {
|
|
20
|
+
const element = document.createElement(tag);
|
|
21
|
+
// eslint-disable-next-line no-restricted-syntax, guard-for-in
|
|
22
|
+
for (const a in attrs) {
|
|
23
|
+
element.setAttribute(a, attrs[a]);
|
|
24
|
+
}
|
|
25
|
+
// eslint-disable-next-line no-restricted-syntax, guard-for-in
|
|
26
|
+
for (const p in styles) {
|
|
27
|
+
element.style[p] = styles[p];
|
|
28
|
+
}
|
|
29
|
+
element.innerHTML = innerHTML;
|
|
30
|
+
return element;
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
describe('DOMUtils#element', () => {
|
|
34
|
+
const test = (tag, attrs, styles, innerHTML, expected) => {
|
|
35
|
+
const document = BrowserUtils.createDocumentFromString('<html><body></body></html>');
|
|
36
|
+
const element = createElement(document, tag, attrs, styles, innerHTML);
|
|
37
|
+
const ret = DOMUtils.getImgFromBackground(element, document);
|
|
38
|
+
if (expected) {
|
|
39
|
+
expect(ret).to.not.be.null;
|
|
40
|
+
expect(ret.outerHTML).to.equal(expected);
|
|
41
|
+
} else {
|
|
42
|
+
expect(ret).to.be.null;
|
|
43
|
+
}
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
it('no background-image style', () => {
|
|
47
|
+
test('p', {}, {}, 'Some content', null);
|
|
48
|
+
test('img', { src: 'https://www.server.com/image.jpg', title: 'Some title' }, {}, '', null);
|
|
49
|
+
test('p', {}, { 'background-image': 'none' }, 'Some content', null);
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it('with background-image style', () => {
|
|
53
|
+
test('p', {}, { 'background-image': 'url(https://www.server.com/image.jpg)' }, 'Some content', '<img src="https://www.server.com/image.jpg">');
|
|
54
|
+
test('p', {}, { 'background-image': 'url("https://www.server.com/image.jpg")' }, 'Some content', '<img src="https://www.server.com/image.jpg">');
|
|
55
|
+
test('p', {}, { 'background-image': 'url(\'https://www.server.com/image.jpg\')' }, 'Some content', '<img src="https://www.server.com/image.jpg">');
|
|
56
|
+
test('p', {}, { 'background-image': 'url(http://localhost:3001/image.jpg)' }, 'Some content', '<img src="http://localhost:3001/image.jpg">');
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
// `createElement` uses JSDOM to create the test-DOM
|
|
60
|
+
// the workaround in DOMUtils#getImgFromBackground exists _precisely_
|
|
61
|
+
// because of a potential bug in JSDOM due to which it doesn't
|
|
62
|
+
// parse `url()` with whitespaces correctly
|
|
63
|
+
// browser specific version of the test
|
|
64
|
+
it('with background-image style containing whitespace in url()', () => {
|
|
65
|
+
test('p', {}, { 'background-image': 'url( /image.jpg )' }, 'Some content', '<img src="/image.jpg">');
|
|
66
|
+
});
|
|
67
|
+
});
|
|
@@ -10,9 +10,10 @@
|
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
deepStrictEqual, ok, strictEqual, fail,
|
|
15
|
+
} from 'assert';
|
|
14
16
|
import { describe, it } from 'mocha';
|
|
15
|
-
import { JSDOM } from 'jsdom';
|
|
16
17
|
import { docx2md } from '@adobe/helix-docx2md';
|
|
17
18
|
import MockMediaHandler from '../mocks/MockMediaHandler.js';
|
|
18
19
|
|
|
@@ -20,33 +21,11 @@ import DOMUtils from '../../src/utils/DOMUtils.js';
|
|
|
20
21
|
import {
|
|
21
22
|
html2md,
|
|
22
23
|
html2docx,
|
|
23
|
-
defaultGenerateDocumentPath,
|
|
24
|
-
defaultTransformDOM,
|
|
25
24
|
} from '../../src/importer/HTML2x.js';
|
|
26
25
|
|
|
27
|
-
|
|
28
|
-
it('default transformation', async () => {
|
|
29
|
-
const { document } = new JSDOM('<html><body><h1>Hello World</h1></body></html>', { runScripts: undefined }).window;
|
|
30
|
-
const out = await defaultTransformDOM({ document });
|
|
31
|
-
strictEqual(out.outerHTML, '<body><h1>Hello World</h1></body>');
|
|
32
|
-
});
|
|
33
|
-
});
|
|
26
|
+
import TestUtils from '../TestUtils.js';
|
|
34
27
|
|
|
35
|
-
|
|
36
|
-
it('default paths', async () => {
|
|
37
|
-
strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com' }), '/index');
|
|
38
|
-
strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/' }), '/index');
|
|
39
|
-
strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/index.html' }), '/index');
|
|
40
|
-
strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/index' }), '/index');
|
|
41
|
-
strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/page' }), '/page');
|
|
42
|
-
strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/page.html' }), '/page');
|
|
43
|
-
strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/folder/page' }), '/folder/page');
|
|
44
|
-
strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/folder/page.html' }), '/folder/page');
|
|
45
|
-
strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/folder/page/' }), '/folder/page/index');
|
|
46
|
-
strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/folder/page with spaces.html' }), '/folder/page-with-spaces');
|
|
47
|
-
strictEqual(await defaultGenerateDocumentPath({ url: 'https://wwww.sample.com/folder/PagE_with_3xtr4_charactére.html' }), '/folder/page-with-3xtr4-charact-re');
|
|
48
|
-
});
|
|
49
|
-
});
|
|
28
|
+
const { createDocumentFromString } = TestUtils;
|
|
50
29
|
|
|
51
30
|
describe('html2x parameters', () => {
|
|
52
31
|
const URL = 'https://www.sample.com/page.html';
|
|
@@ -73,7 +52,9 @@ describe('html2x parameters', () => {
|
|
|
73
52
|
transformDOM: testParams,
|
|
74
53
|
generateDocumentPath: testParams,
|
|
75
54
|
preprocess: testParams,
|
|
76
|
-
},
|
|
55
|
+
}, {
|
|
56
|
+
createDocumentFromString,
|
|
57
|
+
}, {
|
|
77
58
|
originalURL: ORIGNAL_URL,
|
|
78
59
|
});
|
|
79
60
|
|
|
@@ -81,7 +62,9 @@ describe('html2x parameters', () => {
|
|
|
81
62
|
transformDOM: testParams,
|
|
82
63
|
generateDocumentPath: testParams,
|
|
83
64
|
preprocess: testParams,
|
|
84
|
-
},
|
|
65
|
+
}, {
|
|
66
|
+
createDocumentFromString,
|
|
67
|
+
}, {
|
|
85
68
|
originalURL: ORIGNAL_URL,
|
|
86
69
|
});
|
|
87
70
|
});
|
|
@@ -90,22 +73,94 @@ describe('html2x parameters', () => {
|
|
|
90
73
|
await html2md(URL, HTML, {
|
|
91
74
|
transform: testParams,
|
|
92
75
|
preprocess: testParams,
|
|
93
|
-
},
|
|
76
|
+
}, {
|
|
77
|
+
createDocumentFromString,
|
|
78
|
+
}, {
|
|
94
79
|
originalURL: ORIGNAL_URL,
|
|
95
80
|
});
|
|
96
81
|
|
|
97
82
|
await html2docx(URL, HTML, {
|
|
98
83
|
transform: testParams,
|
|
99
84
|
preprocess: testParams,
|
|
100
|
-
},
|
|
85
|
+
}, {
|
|
86
|
+
createDocumentFromString,
|
|
87
|
+
}, {
|
|
88
|
+
originalURL: ORIGNAL_URL,
|
|
89
|
+
});
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it('document can be a Document', async () => {
|
|
93
|
+
const doc = createDocumentFromString(HTML);
|
|
94
|
+
await html2md(URL, doc, {
|
|
95
|
+
transformDOM: testParams,
|
|
96
|
+
generateDocumentPath: testParams,
|
|
97
|
+
preprocess: testParams,
|
|
98
|
+
}, {
|
|
99
|
+
createDocumentFromString,
|
|
100
|
+
}, {
|
|
101
|
+
originalURL: ORIGNAL_URL,
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
await html2docx(URL, doc, {
|
|
105
|
+
transformDOM: testParams,
|
|
106
|
+
generateDocumentPath: testParams,
|
|
107
|
+
preprocess: testParams,
|
|
108
|
+
}, {
|
|
109
|
+
createDocumentFromString,
|
|
110
|
+
}, {
|
|
101
111
|
originalURL: ORIGNAL_URL,
|
|
102
112
|
});
|
|
103
113
|
});
|
|
114
|
+
|
|
115
|
+
it('document cannot be a string in the testing context', async () => {
|
|
116
|
+
// we need JSDOM to create a document
|
|
117
|
+
// because importer default implementation relies on DOMParser
|
|
118
|
+
try {
|
|
119
|
+
await html2md(URL, HTML, {
|
|
120
|
+
transformDOM: testParams,
|
|
121
|
+
generateDocumentPath: testParams,
|
|
122
|
+
preprocess: testParams,
|
|
123
|
+
}, {
|
|
124
|
+
createDocumentFromString: null,
|
|
125
|
+
}, {
|
|
126
|
+
originalURL: ORIGNAL_URL,
|
|
127
|
+
});
|
|
128
|
+
fail('should have thrown an error: default createDocumentFromString works only in browser context');
|
|
129
|
+
} catch (e) {
|
|
130
|
+
ok(true);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
try {
|
|
134
|
+
await html2docx(URL, HTML, {
|
|
135
|
+
transformDOM: testParams,
|
|
136
|
+
generateDocumentPath: testParams,
|
|
137
|
+
preprocess: testParams,
|
|
138
|
+
}, {
|
|
139
|
+
createDocumentFromString: null,
|
|
140
|
+
}, {
|
|
141
|
+
originalURL: ORIGNAL_URL,
|
|
142
|
+
});
|
|
143
|
+
fail('should have thrown an error: default createDocumentFromString works only in browser context');
|
|
144
|
+
} catch (e) {
|
|
145
|
+
ok(true);
|
|
146
|
+
}
|
|
147
|
+
});
|
|
104
148
|
});
|
|
105
149
|
|
|
106
150
|
describe('html2md tests', () => {
|
|
107
151
|
it('html2md provides a default transformation', async () => {
|
|
108
|
-
const out = await html2md('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>'
|
|
152
|
+
const out = await html2md('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>', null, {
|
|
153
|
+
createDocumentFromString,
|
|
154
|
+
});
|
|
155
|
+
strictEqual(out.html.trim(), '<body><h1>Hello World</h1></body>');
|
|
156
|
+
strictEqual(out.md.trim(), '# Hello World');
|
|
157
|
+
strictEqual(out.path, '/page');
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
it('html2md accepts a string', async () => {
|
|
161
|
+
const out = await html2md('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>', null, {
|
|
162
|
+
createDocumentFromString,
|
|
163
|
+
});
|
|
109
164
|
strictEqual(out.html.trim(), '<body><h1>Hello World</h1></body>');
|
|
110
165
|
strictEqual(out.md.trim(), '# Hello World');
|
|
111
166
|
strictEqual(out.path, '/page');
|
|
@@ -119,6 +174,8 @@ describe('html2md tests', () => {
|
|
|
119
174
|
return p;
|
|
120
175
|
},
|
|
121
176
|
generateDocumentPath: () => '/folder/my-custom-path',
|
|
177
|
+
}, {
|
|
178
|
+
createDocumentFromString,
|
|
122
179
|
});
|
|
123
180
|
strictEqual(out.html.trim(), '<p>My Hello to the World</p>');
|
|
124
181
|
strictEqual(out.md.trim(), 'My Hello to the World');
|
|
@@ -142,6 +199,8 @@ describe('html2md tests', () => {
|
|
|
142
199
|
path: '/folder/my-custom-path-p2',
|
|
143
200
|
}];
|
|
144
201
|
},
|
|
202
|
+
}, {
|
|
203
|
+
createDocumentFromString,
|
|
145
204
|
});
|
|
146
205
|
|
|
147
206
|
const out1 = out[0];
|
|
@@ -169,6 +228,8 @@ describe('html2md tests', () => {
|
|
|
169
228
|
path: '/my-custom-path-p1',
|
|
170
229
|
};
|
|
171
230
|
},
|
|
231
|
+
}, {
|
|
232
|
+
createDocumentFromString,
|
|
172
233
|
});
|
|
173
234
|
|
|
174
235
|
strictEqual(out.html.trim(), '<p>My Hello to the World 1</p>');
|
|
@@ -209,6 +270,8 @@ describe('html2md tests', () => {
|
|
|
209
270
|
},
|
|
210
271
|
}];
|
|
211
272
|
},
|
|
273
|
+
}, {
|
|
274
|
+
createDocumentFromString,
|
|
212
275
|
});
|
|
213
276
|
|
|
214
277
|
const out1 = out[0];
|
|
@@ -247,6 +310,8 @@ describe('html2md tests', () => {
|
|
|
247
310
|
},
|
|
248
311
|
},
|
|
249
312
|
}],
|
|
313
|
+
}, {
|
|
314
|
+
createDocumentFromString,
|
|
250
315
|
});
|
|
251
316
|
|
|
252
317
|
// if no element provided, no creation of html, md or docx
|
|
@@ -264,6 +329,8 @@ describe('html2md tests', () => {
|
|
|
264
329
|
it('html2md does not crash if transform returns null', async () => {
|
|
265
330
|
const out = await html2md('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>', {
|
|
266
331
|
transform: () => null,
|
|
332
|
+
}, {
|
|
333
|
+
createDocumentFromString,
|
|
267
334
|
});
|
|
268
335
|
|
|
269
336
|
strictEqual(out.length, 0);
|
|
@@ -273,6 +340,8 @@ describe('html2md tests', () => {
|
|
|
273
340
|
const out = await html2md('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>', {
|
|
274
341
|
transformDOM: () => null,
|
|
275
342
|
generateDocumentPath: () => null,
|
|
343
|
+
}, {
|
|
344
|
+
createDocumentFromString,
|
|
276
345
|
});
|
|
277
346
|
strictEqual(out.html.trim(), '<body><h1>Hello World</h1></body>');
|
|
278
347
|
strictEqual(out.md.trim(), '# Hello World');
|
|
@@ -291,20 +360,27 @@ describe('html2md tests', () => {
|
|
|
291
360
|
return document.body;
|
|
292
361
|
},
|
|
293
362
|
},
|
|
363
|
+
{
|
|
364
|
+
createDocumentFromString,
|
|
365
|
+
},
|
|
294
366
|
);
|
|
295
367
|
strictEqual(out.html.trim(), '<body><img src="./image.png"></body>');
|
|
296
368
|
});
|
|
297
369
|
|
|
298
370
|
it('html2md removes images with src attributes', async () => {
|
|
299
|
-
const out = await html2md('https://www.sample.com/page.html', '<html><body><img src="data:abc"></body></html>'
|
|
371
|
+
const out = await html2md('https://www.sample.com/page.html', '<html><body><img src="data:abc"></body></html>', null, {
|
|
372
|
+
createDocumentFromString,
|
|
373
|
+
});
|
|
300
374
|
strictEqual(out.html.trim(), '<body></body>');
|
|
301
375
|
strictEqual(out.md.trim(), '');
|
|
302
376
|
});
|
|
303
377
|
|
|
304
378
|
it('html2md set image src with data-src attribute value', async () => {
|
|
305
|
-
const out = await html2md('https://www.sample.com/page.html', '<html><body><img src="data:abc" data-src="./image.jpg"></body></html>'
|
|
306
|
-
|
|
307
|
-
|
|
379
|
+
const out = await html2md('https://www.sample.com/page.html', '<html><body><img src="data:abc" data-src="./image.jpg"></body></html>', null, {
|
|
380
|
+
createDocumentFromString,
|
|
381
|
+
});
|
|
382
|
+
strictEqual(out.html.trim(), '<body><img src="https://www.sample.com/image.jpg" data-src="./image.jpg"></body>');
|
|
383
|
+
strictEqual(out.md.trim(), '![][image0]\n\n[image0]: https://www.sample.com/image.jpg');
|
|
308
384
|
});
|
|
309
385
|
|
|
310
386
|
it('html2md allows to preprocess the document', async () => {
|
|
@@ -314,9 +390,11 @@ describe('html2md tests', () => {
|
|
|
314
390
|
img.setAttribute('src', img.getAttribute('data-fancy-src'));
|
|
315
391
|
img.removeAttribute('data-fancy-src');
|
|
316
392
|
},
|
|
393
|
+
}, {
|
|
394
|
+
createDocumentFromString,
|
|
317
395
|
});
|
|
318
|
-
strictEqual(out.html.trim(), '<body><img src="
|
|
319
|
-
strictEqual(out.md.trim(), '![][image0]\n\n[image0]:
|
|
396
|
+
strictEqual(out.html.trim(), '<body><img src="https://www.sample.com/image.jpg"></body>');
|
|
397
|
+
strictEqual(out.md.trim(), '![][image0]\n\n[image0]: https://www.sample.com/image.jpg');
|
|
320
398
|
});
|
|
321
399
|
|
|
322
400
|
it('html2md removes original hrs but keeps md section breaks', async () => {
|
|
@@ -330,6 +408,8 @@ describe('html2md tests', () => {
|
|
|
330
408
|
p.after(hr);
|
|
331
409
|
return document.body;
|
|
332
410
|
},
|
|
411
|
+
}, {
|
|
412
|
+
createDocumentFromString,
|
|
333
413
|
});
|
|
334
414
|
strictEqual(out.html.trim(), '<body><p>text 1</p><p>text 2</p><p>text 3</p><p>text 4</p><hr></body>');
|
|
335
415
|
strictEqual(out.md.trim(), 'text 1\n\ntext 2\n\ntext 3\n\ntext 4\n\n---');
|
|
@@ -338,7 +418,9 @@ describe('html2md tests', () => {
|
|
|
338
418
|
|
|
339
419
|
describe('html2docx tests', () => {
|
|
340
420
|
it('html2docx provides a default transformation', async () => {
|
|
341
|
-
const out = await html2docx('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>'
|
|
421
|
+
const out = await html2docx('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>', null, {
|
|
422
|
+
createDocumentFromString,
|
|
423
|
+
});
|
|
342
424
|
strictEqual(out.html.trim(), '<body><h1>Hello World</h1></body>');
|
|
343
425
|
strictEqual(out.md.trim(), '# Hello World');
|
|
344
426
|
strictEqual(out.path, '/page');
|
|
@@ -357,6 +439,8 @@ describe('html2docx tests', () => {
|
|
|
357
439
|
return p;
|
|
358
440
|
},
|
|
359
441
|
generateDocumentPath: () => '/folder1/folder2/my-custom-path',
|
|
442
|
+
}, {
|
|
443
|
+
createDocumentFromString,
|
|
360
444
|
});
|
|
361
445
|
strictEqual(out.html.trim(), '<p>My Hello to the World</p>');
|
|
362
446
|
strictEqual(out.md.trim(), 'My Hello to the World');
|
|
@@ -14,13 +14,15 @@
|
|
|
14
14
|
|
|
15
15
|
import path from 'path';
|
|
16
16
|
import fs from 'fs-extra';
|
|
17
|
-
import { strictEqual, ok } from 'assert';
|
|
17
|
+
import { strictEqual, ok, fail } from 'assert';
|
|
18
18
|
import { describe, it } from 'mocha';
|
|
19
19
|
import { Response } from 'node-fetch';
|
|
20
20
|
import { dirname } from 'dirname-filename-esm';
|
|
21
21
|
|
|
22
22
|
import { docx2md } from '@adobe/helix-docx2md';
|
|
23
23
|
|
|
24
|
+
import { JSDOM } from 'jsdom';
|
|
25
|
+
|
|
24
26
|
import { unified } from 'unified';
|
|
25
27
|
import remarkParse from 'remark-parse';
|
|
26
28
|
import remarkGridTable from '@adobe/remark-gridtables';
|
|
@@ -38,6 +40,12 @@ const __dirname = dirname(import.meta);
|
|
|
38
40
|
|
|
39
41
|
const logger = new NoopLogger();
|
|
40
42
|
|
|
43
|
+
// test environment createDocumentFromString version using JSDOM
|
|
44
|
+
const createDocumentFromString = (html) => {
|
|
45
|
+
const { document } = new JSDOM(html, { runScripts: undefined }).window;
|
|
46
|
+
return document;
|
|
47
|
+
};
|
|
48
|
+
|
|
41
49
|
describe('PageImporter tests', () => {
|
|
42
50
|
const storageHandler = new MemoryHandler(logger);
|
|
43
51
|
const config = {
|
|
@@ -52,11 +60,31 @@ describe('PageImporter tests', () => {
|
|
|
52
60
|
}
|
|
53
61
|
}
|
|
54
62
|
|
|
55
|
-
const se = new TestImporter(
|
|
63
|
+
const se = new TestImporter({
|
|
64
|
+
createDocumentFromString,
|
|
65
|
+
...config,
|
|
66
|
+
});
|
|
56
67
|
const results = await se.import('someurl');
|
|
57
68
|
|
|
58
69
|
strictEqual(results.length, 0, 'expect no result');
|
|
59
70
|
});
|
|
71
|
+
|
|
72
|
+
it('import - not providing createDocumentFromString should fail in the test enviroment only', async () => {
|
|
73
|
+
class TestImporter extends PageImporter {
|
|
74
|
+
async fetch() {
|
|
75
|
+
return new Response('test');
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const se = new TestImporter(config);
|
|
80
|
+
|
|
81
|
+
try {
|
|
82
|
+
await se.import('someurl');
|
|
83
|
+
fail('should have thrown an error: default createDocumentFromString works only in browser context');
|
|
84
|
+
} catch (e) {
|
|
85
|
+
ok(true);
|
|
86
|
+
}
|
|
87
|
+
});
|
|
60
88
|
});
|
|
61
89
|
|
|
62
90
|
describe('PageImporter tests - various options', () => {
|
|
@@ -76,6 +104,7 @@ describe('PageImporter tests - various options', () => {
|
|
|
76
104
|
const config = {
|
|
77
105
|
storageHandler,
|
|
78
106
|
logger,
|
|
107
|
+
createDocumentFromString,
|
|
79
108
|
};
|
|
80
109
|
const se = new Test(config);
|
|
81
110
|
const results = await se.import('/someurl');
|
|
@@ -105,6 +134,7 @@ describe('PageImporter tests - various options', () => {
|
|
|
105
134
|
mdast2Docx2Options: {
|
|
106
135
|
stylesXML,
|
|
107
136
|
},
|
|
137
|
+
createDocumentFromString,
|
|
108
138
|
};
|
|
109
139
|
const se = new Test(config);
|
|
110
140
|
const results = await se.import('/someurl');
|
|
@@ -144,6 +174,7 @@ describe('PageImporter tests - fixtures', () => {
|
|
|
144
174
|
storageHandler,
|
|
145
175
|
skipDocxConversion: true,
|
|
146
176
|
logger,
|
|
177
|
+
createDocumentFromString,
|
|
147
178
|
};
|
|
148
179
|
const se = new Test(config);
|
|
149
180
|
const results = await se.import(`https://www.sample.com/${feature}`);
|
|
@@ -206,4 +237,8 @@ describe('PageImporter tests - fixtures', () => {
|
|
|
206
237
|
it('import - sub and sup', async () => {
|
|
207
238
|
await featureTest('subsup');
|
|
208
239
|
});
|
|
240
|
+
|
|
241
|
+
it('import - video', async () => {
|
|
242
|
+
await featureTest('video');
|
|
243
|
+
});
|
|
209
244
|
});
|