@adobe/helix-importer 1.13.1 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/README.md +15 -4
- package/package.json +1 -1
- package/src/importer/HTML2x.js +128 -51
- package/src/importer/PageImporter.js +3 -1
- package/test/importers/HTML2x.spec.js +111 -1
- package/test/importers/PageImporter.spec.js +6 -1
- package/test/importers/fixtures/space.spec.html +13 -0
- package/test/importers/fixtures/space.spec.md +19 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
# [1.14.0](https://github.com/adobe/helix-importer/compare/v1.13.1...v1.14.0) (2022-07-06)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Features
|
|
5
|
+
|
|
6
|
+
* import one document -> multiple output ([f0b688e](https://github.com/adobe/helix-importer/commit/f0b688ebc9dbb68a9981c45c99bad9fa106d4376))
|
|
7
|
+
|
|
1
8
|
## [1.13.1](https://github.com/adobe/helix-importer/compare/v1.13.0...v1.13.1) (2022-05-23)
|
|
2
9
|
|
|
3
10
|
|
package/README.md
CHANGED
|
@@ -15,7 +15,7 @@ Idea of an explorer is to crawl the site in order to collect a list of urls. Thi
|
|
|
15
15
|
|
|
16
16
|
Here is a basic sample:
|
|
17
17
|
|
|
18
|
-
```
|
|
18
|
+
```js
|
|
19
19
|
|
|
20
20
|
import { WPContentPager, FSHandler, CSV } from '@adobe/helix-importer';
|
|
21
21
|
|
|
@@ -42,11 +42,22 @@ The final result is a list of urls that could be found on list of paged results
|
|
|
42
42
|
|
|
43
43
|
## Importer
|
|
44
44
|
|
|
45
|
-
An importer must extends [PageImporter](src/importer/PageImporter.
|
|
45
|
+
An importer must extends [PageImporter](src/importer/PageImporter.js) and implement the `fetch` and `process` method. The general idea is that `fetch` receives the url to import and is responsible to return the HTML. `process` receives the corresponding Document in order to filter / rearrange / reshuffle the DOM before it gets processed by the Markdown transformer. `process` computes and defines the list of [PageImporterResource](src/importer/PageImporterResource.ts) (could be more than one), each resource being transformed as a Markdown document.
|
|
46
46
|
|
|
47
47
|
Goal of the importer is to get rid of the generic DOM elements like the header / footer, the nav... and all elements that are common to all pages in order to get the unique piece(s) of content per page.
|
|
48
48
|
|
|
49
|
-
|
|
49
|
+
### HTML2x helpers
|
|
50
|
+
|
|
51
|
+
[HTML2x](src/importer/HTML2x.js) methods (`HTML2md` and `HTML2docx`) are convienence methods to run an import. As input, they take:
|
|
52
|
+
- `URL`: URL of the page to import
|
|
53
|
+
- `document`: the DOM element to import
|
|
54
|
+
- `transformerCfg`: object with the transformation "rules". Object can be either:
|
|
55
|
+
- `{ transformDOM: ({ url, document, html, params }) => { ... return element-to-convert }, generateDocumentPath: ({ url, document, html, params }) => { ... return path-to-target; }}` for a single mapping between one input document / one output file
|
|
56
|
+
- `{ transform: ({ url, document, html, params }) => { ... return [{ element: first-element-to-convert, path: first-path-to-target }, ...] }` for a mapping one input document / multiple output files (useful to generate multiple docx from a single web page)
|
|
57
|
+
|
|
58
|
+
### Importer UI
|
|
59
|
+
|
|
60
|
+
The Helix Importer has a dedicated browser UI: see https://github.com/adobe/helix-importer-ui
|
|
50
61
|
|
|
51
62
|
## Installation
|
|
52
63
|
|
|
@@ -58,6 +69,6 @@ TODO: publish npm module
|
|
|
58
69
|
|
|
59
70
|
## Usage
|
|
60
71
|
|
|
61
|
-
```
|
|
72
|
+
```js
|
|
62
73
|
import { ... } from '@adobe/helix-importer';
|
|
63
74
|
```
|
package/package.json
CHANGED
package/src/importer/HTML2x.js
CHANGED
|
@@ -17,6 +17,7 @@ import { JSDOM } from 'jsdom';
|
|
|
17
17
|
import PageImporter from './PageImporter.js';
|
|
18
18
|
import PageImporterResource from './PageImporterResource.js';
|
|
19
19
|
import MemoryHandler from '../storage/MemoryHandler.js';
|
|
20
|
+
import Utils from '../utils/Utils.js';
|
|
20
21
|
|
|
21
22
|
// import docxStylesXML from '../resources/styles.xml';
|
|
22
23
|
|
|
@@ -35,13 +36,17 @@ function preprocessDOM(document) {
|
|
|
35
36
|
}
|
|
36
37
|
}
|
|
37
38
|
|
|
38
|
-
|
|
39
|
-
|
|
39
|
+
async function defaultTransformDOM({
|
|
40
|
+
// eslint-disable-next-line no-unused-vars
|
|
41
|
+
url, document, html, params,
|
|
42
|
+
}) {
|
|
40
43
|
return document.body;
|
|
41
44
|
}
|
|
42
45
|
|
|
43
|
-
|
|
44
|
-
|
|
46
|
+
async function defaultGenerateDocumentPath({
|
|
47
|
+
// eslint-disable-next-line no-unused-vars
|
|
48
|
+
url, document, html, params,
|
|
49
|
+
}) {
|
|
45
50
|
let p = new URL(url).pathname;
|
|
46
51
|
if (p.endsWith('/')) {
|
|
47
52
|
p = `${p}index`;
|
|
@@ -52,21 +57,26 @@ async function defaultGenerateDocumentPath({ url, document }) {
|
|
|
52
57
|
.replace(/[^a-z0-9/]/gm, '-');
|
|
53
58
|
}
|
|
54
59
|
|
|
55
|
-
async function html2x(
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
60
|
+
async function html2x(
|
|
61
|
+
url,
|
|
62
|
+
doc,
|
|
63
|
+
transformCfg,
|
|
64
|
+
config = { toMd: true, toDocx: false },
|
|
65
|
+
params = {},
|
|
66
|
+
) {
|
|
67
|
+
const transformer = transformCfg || {};
|
|
68
|
+
|
|
69
|
+
if (!transformer.transform) {
|
|
70
|
+
if (!transformer.transformDOM) {
|
|
71
|
+
transformer.transformDOM = defaultTransformDOM;
|
|
72
|
+
}
|
|
64
73
|
|
|
65
|
-
|
|
66
|
-
|
|
74
|
+
if (!transformer.generateDocumentPath) {
|
|
75
|
+
transformer.generateDocumentPath = defaultGenerateDocumentPath;
|
|
76
|
+
}
|
|
67
77
|
}
|
|
68
78
|
|
|
69
|
-
if (
|
|
79
|
+
if (config.preprocess !== false) {
|
|
70
80
|
preprocessDOM(doc);
|
|
71
81
|
}
|
|
72
82
|
|
|
@@ -77,22 +87,59 @@ async function html2x(url, doc, transformCfg, toMd, toDocx, options = {}) {
|
|
|
77
87
|
}
|
|
78
88
|
|
|
79
89
|
async process(document) {
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
90
|
+
if (transformer.transform) {
|
|
91
|
+
let results = transformer.transform({
|
|
92
|
+
url,
|
|
93
|
+
document,
|
|
94
|
+
html,
|
|
95
|
+
params,
|
|
96
|
+
});
|
|
97
|
+
if (!results) return null;
|
|
98
|
+
const pirs = [];
|
|
99
|
+
|
|
100
|
+
if (!Array.isArray(results)) {
|
|
101
|
+
// single element with transform function
|
|
102
|
+
results = [results];
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
results.forEach((result) => {
|
|
106
|
+
const name = path.basename(result.path);
|
|
107
|
+
const dirname = path.dirname(result.path);
|
|
108
|
+
|
|
109
|
+
const pir = new PageImporterResource(name, dirname, result.element, null, {
|
|
110
|
+
html: result.element.outerHTML,
|
|
111
|
+
});
|
|
112
|
+
pirs.push(pir);
|
|
113
|
+
});
|
|
114
|
+
return pirs;
|
|
115
|
+
} else {
|
|
116
|
+
let output = await transformer.transformDOM({
|
|
117
|
+
url,
|
|
118
|
+
document,
|
|
119
|
+
html,
|
|
120
|
+
params,
|
|
121
|
+
});
|
|
122
|
+
output = output || document.body;
|
|
123
|
+
|
|
124
|
+
let p = await transformer.generateDocumentPath({
|
|
125
|
+
url,
|
|
126
|
+
document,
|
|
127
|
+
html,
|
|
128
|
+
params,
|
|
129
|
+
});
|
|
130
|
+
if (!p) {
|
|
131
|
+
// provided function returns null -> apply default
|
|
132
|
+
p = await defaultGenerateDocumentPath({ url, document });
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const name = path.basename(p);
|
|
136
|
+
const dirname = path.dirname(p);
|
|
137
|
+
|
|
138
|
+
const pir = new PageImporterResource(name, dirname, output, null, {
|
|
139
|
+
html: output.outerHTML,
|
|
140
|
+
});
|
|
141
|
+
return [pir];
|
|
87
142
|
}
|
|
88
|
-
|
|
89
|
-
name = path.basename(p);
|
|
90
|
-
dirname = path.dirname(p);
|
|
91
|
-
|
|
92
|
-
const pir = new PageImporterResource(name, dirname, output, null, {
|
|
93
|
-
html: output.outerHTML,
|
|
94
|
-
});
|
|
95
|
-
return [pir];
|
|
96
143
|
}
|
|
97
144
|
}
|
|
98
145
|
|
|
@@ -107,48 +154,78 @@ async function html2x(url, doc, transformCfg, toMd, toDocx, options = {}) {
|
|
|
107
154
|
const storageHandler = new MemoryHandler(logger);
|
|
108
155
|
const importer = new InternalImporter({
|
|
109
156
|
storageHandler,
|
|
110
|
-
skipDocxConversion: !toDocx,
|
|
111
|
-
skipMDFileCreation: !toMd,
|
|
157
|
+
skipDocxConversion: !config.toDocx,
|
|
158
|
+
skipMDFileCreation: !config.toMd,
|
|
112
159
|
logger,
|
|
113
160
|
mdast2docxOptions: {
|
|
114
|
-
stylesXML:
|
|
115
|
-
svg2png:
|
|
161
|
+
stylesXML: config.docxStylesXML,
|
|
162
|
+
svg2png: config.svg2png,
|
|
116
163
|
},
|
|
117
164
|
});
|
|
118
165
|
|
|
119
166
|
const pirs = await importer.import(url);
|
|
120
167
|
|
|
121
|
-
const
|
|
122
|
-
|
|
123
|
-
|
|
168
|
+
const getResponseObjectFromPIR = async (pir) => {
|
|
169
|
+
const res = {
|
|
170
|
+
html: pir.extra.html,
|
|
171
|
+
};
|
|
124
172
|
|
|
125
|
-
|
|
173
|
+
res.path = path.resolve(pir.directory, pir.name);
|
|
126
174
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
175
|
+
if (config.toMd) {
|
|
176
|
+
const md = await storageHandler.get(pir.md);
|
|
177
|
+
res.md = md;
|
|
178
|
+
}
|
|
179
|
+
if (config.toDocx) {
|
|
180
|
+
const docx = await storageHandler.get(pir.docx);
|
|
181
|
+
res.docx = docx;
|
|
182
|
+
}
|
|
183
|
+
return res;
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
if (pirs.length === 1) {
|
|
187
|
+
return getResponseObjectFromPIR(pirs[0]);
|
|
188
|
+
} else {
|
|
189
|
+
const res = [];
|
|
190
|
+
await Utils.asyncForEach(pirs, async (pir) => {
|
|
191
|
+
res.push(await getResponseObjectFromPIR(pir));
|
|
192
|
+
});
|
|
193
|
+
return res;
|
|
134
194
|
}
|
|
135
|
-
return res;
|
|
136
195
|
}
|
|
137
196
|
|
|
138
|
-
|
|
197
|
+
/**
|
|
198
|
+
* Returns the result of the conversion from html to md.
|
|
199
|
+
* @param {string} url URL of the document to convert
|
|
200
|
+
* @param {HTMLElement|string} document Document to convert
|
|
201
|
+
* @param {Object} transformCfg Conversion configuration
|
|
202
|
+
* @param {Object} config Conversion configuration.
|
|
203
|
+
* @param {Object} params Conversion params. Object will be pass to the transformer functions.
|
|
204
|
+
* @returns {Object|Array} Result(s) of the conversion
|
|
205
|
+
*/
|
|
206
|
+
async function html2md(url, document, transformCfg, config, params = {}) {
|
|
139
207
|
let doc = document;
|
|
140
208
|
if (typeof document === 'string') {
|
|
141
209
|
doc = new JSDOM(document, { runScripts: undefined }).window.document;
|
|
142
210
|
}
|
|
143
|
-
return html2x(url, doc, transformCfg, true, false,
|
|
211
|
+
return html2x(url, doc, transformCfg, { ...config, toMd: true, toDocx: false }, params);
|
|
144
212
|
}
|
|
145
213
|
|
|
146
|
-
|
|
214
|
+
/**
|
|
215
|
+
* Returns the result of the conversion from html to docx.
|
|
216
|
+
* @param {string} url URL of the document to convert
|
|
217
|
+
* @param {HTMLElement|string} document Document to convert
|
|
218
|
+
* @param {Object} transformCfg Conversion configuration
|
|
219
|
+
* @param {Object} config Conversion configuration.
|
|
220
|
+
* @param {Object} params Conversion params. Object will be pass to the transformer functions.
|
|
221
|
+
* @returns {Object|Array} Result(s) of the conversion
|
|
222
|
+
*/
|
|
223
|
+
async function html2docx(url, document, transformCfg, config, params = {}) {
|
|
147
224
|
let doc = document;
|
|
148
225
|
if (typeof document === 'string') {
|
|
149
226
|
doc = new JSDOM(document, { runScripts: undefined }).window.document;
|
|
150
227
|
}
|
|
151
|
-
return html2x(url, doc, transformCfg, true, true,
|
|
228
|
+
return html2x(url, doc, transformCfg, { ...config, toMd: true, toDocx: true }, params);
|
|
152
229
|
}
|
|
153
230
|
|
|
154
231
|
export {
|
|
@@ -160,7 +160,7 @@ export default class PageImporter {
|
|
|
160
160
|
contents = this.postProcessMD(contents);
|
|
161
161
|
|
|
162
162
|
return {
|
|
163
|
-
path:
|
|
163
|
+
path: path.join(directory, sanitizedName),
|
|
164
164
|
content: contents,
|
|
165
165
|
};
|
|
166
166
|
}
|
|
@@ -296,6 +296,8 @@ export default class PageImporter {
|
|
|
296
296
|
// eslint-disable-next-line no-param-reassign
|
|
297
297
|
entry.source = url;
|
|
298
298
|
// eslint-disable-next-line no-param-reassign
|
|
299
|
+
entry.path = res.path;
|
|
300
|
+
// eslint-disable-next-line no-param-reassign
|
|
299
301
|
entry.markdown = res.content;
|
|
300
302
|
|
|
301
303
|
if (!this.params.skipMDFileCreation) {
|
|
@@ -48,6 +48,57 @@ describe('defaultGenerateDocumentPath tests', () => {
|
|
|
48
48
|
});
|
|
49
49
|
});
|
|
50
50
|
|
|
51
|
+
describe('html2x parameters', () => {
|
|
52
|
+
const URL = 'https://www.sample.com/page.html';
|
|
53
|
+
const ORIGNAL_URL = 'https://www.notproxyurl.com/folder/page.html';
|
|
54
|
+
const HTML = '<html><head></head><body><h1>Hello World</h1></body></html>';
|
|
55
|
+
|
|
56
|
+
const testParams = ({
|
|
57
|
+
url,
|
|
58
|
+
document,
|
|
59
|
+
html,
|
|
60
|
+
params,
|
|
61
|
+
}) => {
|
|
62
|
+
strictEqual(url, URL);
|
|
63
|
+
strictEqual(params.originalURL, ORIGNAL_URL);
|
|
64
|
+
strictEqual(html, HTML);
|
|
65
|
+
|
|
66
|
+
const h1 = document.querySelector('h1');
|
|
67
|
+
ok(h1);
|
|
68
|
+
strictEqual(h1.textContent, 'Hello World');
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
it('parameters are correctly passed in single mode', async () => {
|
|
72
|
+
await html2md(URL, HTML, {
|
|
73
|
+
transformDOM: testParams,
|
|
74
|
+
generateDocumentPath: testParams,
|
|
75
|
+
}, null, {
|
|
76
|
+
originalURL: ORIGNAL_URL,
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
await html2docx(URL, HTML, {
|
|
80
|
+
transformDOM: testParams,
|
|
81
|
+
generateDocumentPath: testParams,
|
|
82
|
+
}, null, {
|
|
83
|
+
originalURL: ORIGNAL_URL,
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
it('parameters are correctly passed in multi mode', async () => {
|
|
88
|
+
await html2md(URL, HTML, {
|
|
89
|
+
transform: testParams,
|
|
90
|
+
}, null, {
|
|
91
|
+
originalURL: ORIGNAL_URL,
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
await html2docx(URL, HTML, {
|
|
95
|
+
transform: testParams,
|
|
96
|
+
}, null, {
|
|
97
|
+
originalURL: ORIGNAL_URL,
|
|
98
|
+
});
|
|
99
|
+
});
|
|
100
|
+
});
|
|
101
|
+
|
|
51
102
|
describe('html2md tests', () => {
|
|
52
103
|
it('html2md provides a default transformation', async () => {
|
|
53
104
|
const out = await html2md('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>');
|
|
@@ -56,7 +107,7 @@ describe('html2md tests', () => {
|
|
|
56
107
|
strictEqual(out.path, '/page');
|
|
57
108
|
});
|
|
58
109
|
|
|
59
|
-
it('html2md handles a custom
|
|
110
|
+
it('html2md handles a custom transformation', async () => {
|
|
60
111
|
const out = await html2md('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>', {
|
|
61
112
|
transformDOM: ({ document }) => {
|
|
62
113
|
const p = document.createElement('p');
|
|
@@ -70,6 +121,65 @@ describe('html2md tests', () => {
|
|
|
70
121
|
strictEqual(out.path, '/folder/my-custom-path');
|
|
71
122
|
});
|
|
72
123
|
|
|
124
|
+
it('html2md handles multiple transform', async () => {
|
|
125
|
+
const out = await html2md('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>', {
|
|
126
|
+
transform: ({ document }) => {
|
|
127
|
+
const p1 = document.createElement('p');
|
|
128
|
+
p1.innerHTML = 'My Hello to the World 1';
|
|
129
|
+
|
|
130
|
+
const p2 = document.createElement('p');
|
|
131
|
+
p2.innerHTML = 'My Hello to the World 2';
|
|
132
|
+
|
|
133
|
+
return [{
|
|
134
|
+
element: p1,
|
|
135
|
+
path: '/my-custom-path-p1',
|
|
136
|
+
}, {
|
|
137
|
+
element: p2,
|
|
138
|
+
path: '/folder/my-custom-path-p2',
|
|
139
|
+
}];
|
|
140
|
+
},
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
const out1 = out[0];
|
|
144
|
+
strictEqual(out1.html.trim(), '<p>My Hello to the World 1</p>');
|
|
145
|
+
strictEqual(out1.md.trim(), 'My Hello to the World 1');
|
|
146
|
+
strictEqual(out1.path, '/my-custom-path-p1');
|
|
147
|
+
|
|
148
|
+
const out2 = out[1];
|
|
149
|
+
strictEqual(out2.html.trim(), '<p>My Hello to the World 2</p>');
|
|
150
|
+
strictEqual(out2.md.trim(), 'My Hello to the World 2');
|
|
151
|
+
strictEqual(out2.path, '/folder/my-custom-path-p2');
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
it('html2md handles multiple transform', async () => {
|
|
155
|
+
const out = await html2md('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>', {
|
|
156
|
+
transform: ({ document }) => {
|
|
157
|
+
const p1 = document.createElement('p');
|
|
158
|
+
p1.innerHTML = 'My Hello to the World 1';
|
|
159
|
+
|
|
160
|
+
const p2 = document.createElement('p');
|
|
161
|
+
p2.innerHTML = 'My Hello to the World 2';
|
|
162
|
+
|
|
163
|
+
return {
|
|
164
|
+
element: p1,
|
|
165
|
+
path: '/my-custom-path-p1',
|
|
166
|
+
};
|
|
167
|
+
},
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
strictEqual(out.html.trim(), '<p>My Hello to the World 1</p>');
|
|
171
|
+
strictEqual(out.md.trim(), 'My Hello to the World 1');
|
|
172
|
+
strictEqual(out.path, '/my-custom-path-p1');
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
it('html2md does not crash if transform returns null', async () => {
|
|
176
|
+
const out = await html2md('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>', {
|
|
177
|
+
transform: () => null,
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
strictEqual(out.length, 0);
|
|
181
|
+
});
|
|
182
|
+
|
|
73
183
|
it('html2md can deal with null returning transformation', async () => {
|
|
74
184
|
const out = await html2md('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>', {
|
|
75
185
|
transformDOM: () => null,
|
|
@@ -76,6 +76,7 @@ describe('PageImporter tests - various options', () => {
|
|
|
76
76
|
const results = await se.import('/someurl');
|
|
77
77
|
|
|
78
78
|
strictEqual(results.length, 1, 'expect no result');
|
|
79
|
+
strictEqual(results[0].path, '/someurl/somecomputedpath/resource1', 'expect no result');
|
|
79
80
|
|
|
80
81
|
ok(await storageHandler.exists('/someurl/somecomputedpath/resource1.md'), 'md has been stored');
|
|
81
82
|
ok(await storageHandler.exists('/someurl/somecomputedpath/resource1.docx'), 'docx has been stored');
|
|
@@ -144,7 +145,7 @@ describe('PageImporter tests - fixtures', () => {
|
|
|
144
145
|
|
|
145
146
|
strictEqual(results.length, 1, 'expect one result');
|
|
146
147
|
|
|
147
|
-
const md = await storageHandler.get(
|
|
148
|
+
const md = await storageHandler.get(results[0].md);
|
|
148
149
|
const expectedMD = await fs.readFile(path.resolve(__dirname, 'fixtures', `${feature}.spec.md`), 'utf-8');
|
|
149
150
|
strictEqual(md.trim(), expectedMD.trim(), 'inported md is expected one');
|
|
150
151
|
};
|
|
@@ -172,4 +173,8 @@ describe('PageImporter tests - fixtures', () => {
|
|
|
172
173
|
it('import - complex', async () => {
|
|
173
174
|
await featureTest('complex');
|
|
174
175
|
});
|
|
176
|
+
|
|
177
|
+
it('import - spaces', async () => {
|
|
178
|
+
await featureTest('space');
|
|
179
|
+
});
|
|
175
180
|
});
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
<html>
|
|
2
|
+
<body>
|
|
3
|
+
<h1>Space sample</h1>
|
|
4
|
+
<p>A simple paragraph</p>
|
|
5
|
+
<p>A paragraph with a br inside.<br> This should be next line.</p>
|
|
6
|
+
<p>A paragraph with a br at the end.<br></p>
|
|
7
|
+
<p>A paragraph followed by a br</p>
|
|
8
|
+
<br>
|
|
9
|
+
<p>A paragraph after the br</p>
|
|
10
|
+
|
|
11
|
+
<p>A paragraph after the nbsp;</p>
|
|
12
|
+
</body>
|
|
13
|
+
</html>
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Space sample
|
|
2
|
+
|
|
3
|
+
A simple paragraph
|
|
4
|
+
|
|
5
|
+
A paragraph with a br inside.\
|
|
6
|
+
This should be next line.
|
|
7
|
+
|
|
8
|
+
A paragraph with a br at the end.
|
|
9
|
+
|
|
10
|
+
A paragraph followed by a br
|
|
11
|
+
|
|
12
|
+
\
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
A paragraph after the br
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
A paragraph after the nbsp;
|