@adobe/helix-importer 2.9.41 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/README.md +5 -38
- package/package.json +13 -6
- package/src/importer/HTML2x.js +19 -30
- package/src/importer/PageImporter.js +12 -4
- package/src/importer/PageImporterParams.js +2 -0
- package/src/importer/defaults/generateDocumentPath.js +24 -0
- package/src/importer/defaults/rules/adjustImageUrls.js +28 -0
- package/src/importer/defaults/rules/convertIcons.js +25 -0
- package/src/importer/defaults/rules/createMetadata.js +102 -0
- package/src/importer/defaults/rules/transformBackgroundImages.js +21 -0
- package/src/importer/defaults/transformDOM.js +42 -0
- package/src/index.js +13 -11
- package/src/utils/BrowserUtils.js +29 -0
- package/src/utils/DOMUtils.js +19 -13
- package/src/wp/WPUtils.js +1 -3
- package/{src/explorer/PagingExplorerParams.js → test/TestUtils.js} +8 -4
- package/test/browser/BrowserUtils.test.js +42 -0
- package/test/browser/DOMUtils.test.js +67 -0
- package/test/importers/HTML2x.spec.js +122 -38
- package/test/importers/PageImporter.spec.js +37 -2
- package/test/importers/defaults/fixtures/adjust-image-urls.expected.html +7 -0
- package/test/importers/defaults/fixtures/adjust-image-urls.input.html +10 -0
- package/test/importers/defaults/fixtures/background-image.expected.html +13 -0
- package/test/importers/defaults/fixtures/background-image.input.html +10 -0
- package/test/importers/defaults/fixtures/cleanup.expected.html +5 -0
- package/test/importers/defaults/fixtures/cleanup.input.html +11 -0
- package/test/importers/defaults/fixtures/default.expected.html +4 -0
- package/test/importers/defaults/fixtures/default.input.html +6 -0
- package/test/importers/defaults/fixtures/icons.expected.html +4 -0
- package/test/importers/defaults/fixtures/icons.input.html +6 -0
- package/test/importers/defaults/fixtures/metadata.all.diff.expected.html +40 -0
- package/test/importers/defaults/fixtures/metadata.all.diff.input.html +17 -0
- package/test/importers/defaults/fixtures/metadata.all.same.expected.html +20 -0
- package/test/importers/defaults/fixtures/metadata.all.same.input.html +17 -0
- package/test/importers/defaults/fixtures/metadata.basic.expected.html +16 -0
- package/test/importers/defaults/fixtures/metadata.basic.input.html +9 -0
- package/test/importers/defaults/fixtures/metadata.image.expected.html +12 -0
- package/test/importers/defaults/fixtures/metadata.image.input.html +9 -0
- package/test/importers/defaults/fixtures/metadata.og.expected.html +16 -0
- package/test/importers/defaults/fixtures/metadata.og.input.html +9 -0
- package/test/importers/defaults/fixtures/metadata.twitter.expected.html +16 -0
- package/test/importers/defaults/fixtures/metadata.twitter.input.html +9 -0
- package/test/importers/defaults/generateDocumentPath.spec.js +32 -0
- package/test/importers/defaults/transformDOM.spec.js +94 -0
- package/test/importers/fixtures/video.spec.html +11 -0
- package/test/importers/fixtures/video.spec.md +7 -0
- package/test/utils/DOMUtils.spec.js +23 -4
- package/src/explorer/PagingExplorer.js +0 -81
- package/src/wp/explorers/WPAdminAjaxPager.js +0 -51
- package/src/wp/explorers/WPContentPager.js +0 -48
- package/src/wp/explorers/WPPostWrapPager.js +0 -43
- package/test/explorers/PagingExplorer.spec.js +0 -280
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
-
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
-
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
-
*
|
|
7
|
-
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
-
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
-
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
-
* governing permissions and limitations under the License.
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import fetch from 'node-fetch';
|
|
14
|
-
import PagingExplorer from '../../explorer/PagingExplorer.js';
|
|
15
|
-
|
|
16
|
-
const API = 'page/';
|
|
17
|
-
|
|
18
|
-
export default class WPContentPager extends PagingExplorer {
|
|
19
|
-
async fetch(page) {
|
|
20
|
-
const api = `${this.params.url}${API}${page}`;
|
|
21
|
-
return fetch(api);
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
// eslint-disable-next-line class-methods-use-this
|
|
25
|
-
process(document, all) {
|
|
26
|
-
const entries = [];
|
|
27
|
-
document.querySelectorAll('main .content .entry, main .entries .entry, article .entries .entry').forEach((el) => {
|
|
28
|
-
const link = el.querySelector('h2 a');
|
|
29
|
-
if (link) {
|
|
30
|
-
const url = link.getAttribute('href');
|
|
31
|
-
|
|
32
|
-
const entryDate = el.querySelector('.date') || el.querySelector('.entry_footer');
|
|
33
|
-
let date = '';
|
|
34
|
-
if (entryDate) {
|
|
35
|
-
date = entryDate.textContent.trim();
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
if (all.findIndex((entry) => entry.url === url) === -1) {
|
|
39
|
-
entries.push({
|
|
40
|
-
date,
|
|
41
|
-
url,
|
|
42
|
-
});
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
});
|
|
46
|
-
return entries;
|
|
47
|
-
}
|
|
48
|
-
}
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
-
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
-
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
-
*
|
|
7
|
-
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
-
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
-
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
-
* governing permissions and limitations under the License.
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import fetch from 'node-fetch';
|
|
14
|
-
import PagingExplorer from '../../explorer/PagingExplorer.js';
|
|
15
|
-
|
|
16
|
-
const API = 'page/';
|
|
17
|
-
|
|
18
|
-
export default class WPPostWrapPager extends PagingExplorer {
|
|
19
|
-
async fetch(page) {
|
|
20
|
-
const api = `${this.params.url}${API}${page}`;
|
|
21
|
-
return fetch(api);
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
// eslint-disable-next-line class-methods-use-this
|
|
25
|
-
process(document, all) {
|
|
26
|
-
const entries = [];
|
|
27
|
-
document.querySelectorAll('.post-meta-wrap').forEach((el) => {
|
|
28
|
-
const link = el.querySelector('.post-item > a');
|
|
29
|
-
const url = link.getAttribute('href');
|
|
30
|
-
|
|
31
|
-
const entryDate = el.querySelector('.post-date');
|
|
32
|
-
const date = entryDate.textContent.trim();
|
|
33
|
-
|
|
34
|
-
if (all.findIndex((entry) => entry.url === url) === -1) {
|
|
35
|
-
entries.push({
|
|
36
|
-
date,
|
|
37
|
-
url,
|
|
38
|
-
});
|
|
39
|
-
}
|
|
40
|
-
});
|
|
41
|
-
return entries;
|
|
42
|
-
}
|
|
43
|
-
}
|
|
@@ -1,280 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright 2020 Adobe. All rights reserved.
|
|
3
|
-
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
-
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
-
*
|
|
7
|
-
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
-
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
-
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
-
* governing permissions and limitations under the License.
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
/* eslint-disable max-classes-per-file, class-methods-use-this */
|
|
14
|
-
|
|
15
|
-
import { deepStrictEqual, strictEqual } from 'assert';
|
|
16
|
-
import { describe, it } from 'mocha';
|
|
17
|
-
|
|
18
|
-
import { Response } from 'node-fetch';
|
|
19
|
-
|
|
20
|
-
import PagingExplorer from '../../src/explorer/PagingExplorer.js';
|
|
21
|
-
|
|
22
|
-
describe('PagingExplorer tests', () => {
|
|
23
|
-
const params = {
|
|
24
|
-
url: 'testdest',
|
|
25
|
-
nbMaxPages: 3,
|
|
26
|
-
};
|
|
27
|
-
|
|
28
|
-
it('fetch and process are called 3 times if end not reached', async () => {
|
|
29
|
-
let fetchCalled = 0;
|
|
30
|
-
let processCalled = 0;
|
|
31
|
-
|
|
32
|
-
class Test extends PagingExplorer {
|
|
33
|
-
async fetch() {
|
|
34
|
-
fetchCalled += 1;
|
|
35
|
-
return new Response('test');
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
process() {
|
|
39
|
-
processCalled += 1;
|
|
40
|
-
return [{
|
|
41
|
-
a: 1,
|
|
42
|
-
}];
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
const se = new Test(params);
|
|
47
|
-
await se.explore();
|
|
48
|
-
|
|
49
|
-
strictEqual(fetchCalled, 3, 'process is called 3 times');
|
|
50
|
-
strictEqual(processCalled, 3, 'process is called 3 times');
|
|
51
|
-
});
|
|
52
|
-
|
|
53
|
-
it('fetch stops the process when reaches the end', async () => {
|
|
54
|
-
let fetchCalled = 0;
|
|
55
|
-
let processCalled = 0;
|
|
56
|
-
|
|
57
|
-
class Test extends PagingExplorer {
|
|
58
|
-
async fetch() {
|
|
59
|
-
fetchCalled += 1;
|
|
60
|
-
if (fetchCalled > 1) {
|
|
61
|
-
return new Response('reached the end', { status: 404 });
|
|
62
|
-
}
|
|
63
|
-
return new Response('test');
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
process() {
|
|
67
|
-
processCalled += 1;
|
|
68
|
-
return [{
|
|
69
|
-
a: 1,
|
|
70
|
-
}];
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
const se = new Test(params);
|
|
75
|
-
await se.explore();
|
|
76
|
-
|
|
77
|
-
strictEqual(fetchCalled, 2, 'fetch is called 2 times');
|
|
78
|
-
strictEqual(processCalled, 1, 'process is called 1 time');
|
|
79
|
-
});
|
|
80
|
-
|
|
81
|
-
it('explore returns the expected result set', async () => {
|
|
82
|
-
let processCalled = 0;
|
|
83
|
-
class Test extends PagingExplorer {
|
|
84
|
-
async fetch() {
|
|
85
|
-
return new Response('test');
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
process() {
|
|
89
|
-
processCalled += 1;
|
|
90
|
-
return [{
|
|
91
|
-
a: processCalled,
|
|
92
|
-
}];
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
const se = new Test(params);
|
|
97
|
-
const results = await se.explore();
|
|
98
|
-
|
|
99
|
-
deepStrictEqual(results, [{ a: 1 }, { a: 2 }, { a: 3 }], 'result is correct');
|
|
100
|
-
});
|
|
101
|
-
|
|
102
|
-
it('explore returns the expected result set when number of pages is not the max one', async () => {
|
|
103
|
-
let fetchCalled = 0;
|
|
104
|
-
let processCalled = 0;
|
|
105
|
-
class Test extends PagingExplorer {
|
|
106
|
-
async fetch() {
|
|
107
|
-
fetchCalled += 1;
|
|
108
|
-
if (fetchCalled > 2) {
|
|
109
|
-
return new Response('reached the end', { status: 404 });
|
|
110
|
-
}
|
|
111
|
-
return new Response('test');
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
process() {
|
|
115
|
-
processCalled += 1;
|
|
116
|
-
return [{
|
|
117
|
-
a: processCalled,
|
|
118
|
-
}];
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
const se = new Test(params);
|
|
123
|
-
const results = await se.explore();
|
|
124
|
-
|
|
125
|
-
deepStrictEqual(results, [{ a: 1 }, { a: 2 }], 'result is correct');
|
|
126
|
-
});
|
|
127
|
-
|
|
128
|
-
it('explore, fetch and process can be used to retrieve multipage results', async () => {
|
|
129
|
-
let fetchCalled = 0;
|
|
130
|
-
class Test extends PagingExplorer {
|
|
131
|
-
async fetch() {
|
|
132
|
-
fetchCalled += 1;
|
|
133
|
-
if (fetchCalled > 2) {
|
|
134
|
-
return new Response('reached the end', { status: 404 });
|
|
135
|
-
}
|
|
136
|
-
return new Response(`<html>
|
|
137
|
-
<body>
|
|
138
|
-
<a href="a${fetchCalled}.html">a${fetchCalled}</a>
|
|
139
|
-
<a href="b${fetchCalled}.html">b${fetchCalled}</a>
|
|
140
|
-
<a href="c${fetchCalled}.html">c${fetchCalled}</a>
|
|
141
|
-
</body
|
|
142
|
-
</html>`);
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
process(document) {
|
|
146
|
-
const entries = [];
|
|
147
|
-
document.querySelectorAll('a').forEach((el) => {
|
|
148
|
-
entries.push({
|
|
149
|
-
link: el.getAttribute('href'),
|
|
150
|
-
});
|
|
151
|
-
});
|
|
152
|
-
return entries;
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
const se = new Test(params);
|
|
157
|
-
const results = await se.explore();
|
|
158
|
-
|
|
159
|
-
deepStrictEqual(results, [
|
|
160
|
-
{ link: 'a1.html' },
|
|
161
|
-
{ link: 'b1.html' },
|
|
162
|
-
{ link: 'c1.html' },
|
|
163
|
-
{ link: 'a2.html' },
|
|
164
|
-
{ link: 'b2.html' },
|
|
165
|
-
{ link: 'c2.html' },
|
|
166
|
-
], 'result is correct');
|
|
167
|
-
});
|
|
168
|
-
|
|
169
|
-
it('process receives the entry set from previous pages', async () => {
|
|
170
|
-
let fetchCalled = 0;
|
|
171
|
-
class Test extends PagingExplorer {
|
|
172
|
-
async fetch() {
|
|
173
|
-
fetchCalled += 1;
|
|
174
|
-
return new Response(`<html>
|
|
175
|
-
<body>
|
|
176
|
-
<a href="a${fetchCalled}.html">a${fetchCalled}</a>
|
|
177
|
-
<a href="b${fetchCalled}.html">b${fetchCalled}</a>
|
|
178
|
-
<a href="c${fetchCalled}.html">c${fetchCalled}</a>
|
|
179
|
-
</body
|
|
180
|
-
</html>`);
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
process(document, all) {
|
|
184
|
-
const testResult = [];
|
|
185
|
-
for (let i = 1; i < fetchCalled; i += 1) {
|
|
186
|
-
testResult.push({ link: `a${i}.html` });
|
|
187
|
-
testResult.push({ link: `b${i}.html` });
|
|
188
|
-
testResult.push({ link: `c${i}.html` });
|
|
189
|
-
}
|
|
190
|
-
deepStrictEqual(all, testResult, 'all entries argument contains previous entries from previous pages');
|
|
191
|
-
|
|
192
|
-
const entries = [];
|
|
193
|
-
document.querySelectorAll('a').forEach((el) => {
|
|
194
|
-
entries.push({
|
|
195
|
-
link: el.getAttribute('href'),
|
|
196
|
-
});
|
|
197
|
-
});
|
|
198
|
-
return entries;
|
|
199
|
-
}
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
const se = new Test(params);
|
|
203
|
-
await se.explore();
|
|
204
|
-
});
|
|
205
|
-
|
|
206
|
-
it('explorer params are honored', async () => {
|
|
207
|
-
const start = 2;
|
|
208
|
-
let callbackCalled = 0;
|
|
209
|
-
let processCalled = 0;
|
|
210
|
-
class Test extends PagingExplorer {
|
|
211
|
-
async fetch() {
|
|
212
|
-
return new Response('test');
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
process() {
|
|
216
|
-
processCalled += 1;
|
|
217
|
-
return [{
|
|
218
|
-
a: processCalled + start - 1,
|
|
219
|
-
}];
|
|
220
|
-
}
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
const se = new Test(params);
|
|
224
|
-
const results = await se.explore(2, () => {
|
|
225
|
-
callbackCalled += 1;
|
|
226
|
-
});
|
|
227
|
-
|
|
228
|
-
strictEqual(callbackCalled, 2, 'callback called twice');
|
|
229
|
-
deepStrictEqual(results, [{ a: 2 }, { a: 3 }], 'result is correct');
|
|
230
|
-
});
|
|
231
|
-
|
|
232
|
-
it('no text in response', async () => {
|
|
233
|
-
let processCalled = 0;
|
|
234
|
-
class Test extends PagingExplorer {
|
|
235
|
-
async fetch() {
|
|
236
|
-
if (processCalled < 2) {
|
|
237
|
-
return new Response('test');
|
|
238
|
-
}
|
|
239
|
-
return new Response('');
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
process() {
|
|
243
|
-
processCalled += 1;
|
|
244
|
-
return [{
|
|
245
|
-
a: processCalled,
|
|
246
|
-
}];
|
|
247
|
-
}
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
const se = new Test(params);
|
|
251
|
-
const results = await se.explore();
|
|
252
|
-
|
|
253
|
-
deepStrictEqual(results, [{ a: 1 }, { a: 2 }], 'result is correct');
|
|
254
|
-
});
|
|
255
|
-
|
|
256
|
-
it('no entries on page', async () => {
|
|
257
|
-
let processCalled = 0;
|
|
258
|
-
class Test extends PagingExplorer {
|
|
259
|
-
async fetch() {
|
|
260
|
-
return new Response('test');
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
process() {
|
|
264
|
-
if (processCalled < 2) {
|
|
265
|
-
processCalled += 1;
|
|
266
|
-
return [{
|
|
267
|
-
a: processCalled,
|
|
268
|
-
}];
|
|
269
|
-
} else {
|
|
270
|
-
return null;
|
|
271
|
-
}
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
const se = new Test(params);
|
|
276
|
-
const results = await se.explore();
|
|
277
|
-
|
|
278
|
-
deepStrictEqual(results, [{ a: 1 }, { a: 2 }], 'result is correct');
|
|
279
|
-
});
|
|
280
|
-
});
|