@adobe/helix-importer 2.9.40 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/README.md +5 -38
- package/package.json +17 -10
- package/src/importer/HTML2x.js +17 -9
- package/src/importer/PageImporter.js +12 -4
- package/src/importer/PageImporterParams.js +2 -0
- package/src/index.js +0 -11
- package/src/utils/BrowserUtils.js +29 -0
- package/src/utils/DOMUtils.js +15 -11
- package/src/wp/WPUtils.js +1 -3
- package/test/browser/BrowserUtils.test.js +42 -0
- package/test/importers/HTML2x.spec.js +123 -10
- package/test/importers/PageImporter.spec.js +33 -2
- package/test/utils/DOMUtils.spec.js +20 -0
- package/src/explorer/PagingExplorer.js +0 -81
- package/src/explorer/PagingExplorerParams.js +0 -17
- package/src/wp/explorers/WPAdminAjaxPager.js +0 -51
- package/src/wp/explorers/WPContentPager.js +0 -48
- package/src/wp/explorers/WPPostWrapPager.js +0 -43
- package/test/explorers/PagingExplorer.spec.js +0 -280
|
@@ -10,7 +10,9 @@
|
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
deepStrictEqual, ok, strictEqual, fail,
|
|
15
|
+
} from 'assert';
|
|
14
16
|
import { describe, it } from 'mocha';
|
|
15
17
|
import { JSDOM } from 'jsdom';
|
|
16
18
|
import { docx2md } from '@adobe/helix-docx2md';
|
|
@@ -24,9 +26,15 @@ import {
|
|
|
24
26
|
defaultTransformDOM,
|
|
25
27
|
} from '../../src/importer/HTML2x.js';
|
|
26
28
|
|
|
29
|
+
// test environment createDocumentFromString version using JSDOM
|
|
30
|
+
const createDocumentFromString = (html) => {
|
|
31
|
+
const { document } = new JSDOM(html, { runScripts: undefined }).window;
|
|
32
|
+
return document;
|
|
33
|
+
};
|
|
34
|
+
|
|
27
35
|
describe('defaultTransformDOM tests', () => {
|
|
28
36
|
it('default transformation', async () => {
|
|
29
|
-
const
|
|
37
|
+
const document = createDocumentFromString('<html><body><h1>Hello World</h1></body></html>');
|
|
30
38
|
const out = await defaultTransformDOM({ document });
|
|
31
39
|
strictEqual(out.outerHTML, '<body><h1>Hello World</h1></body>');
|
|
32
40
|
});
|
|
@@ -73,7 +81,9 @@ describe('html2x parameters', () => {
|
|
|
73
81
|
transformDOM: testParams,
|
|
74
82
|
generateDocumentPath: testParams,
|
|
75
83
|
preprocess: testParams,
|
|
76
|
-
},
|
|
84
|
+
}, {
|
|
85
|
+
createDocumentFromString,
|
|
86
|
+
}, {
|
|
77
87
|
originalURL: ORIGNAL_URL,
|
|
78
88
|
});
|
|
79
89
|
|
|
@@ -81,7 +91,9 @@ describe('html2x parameters', () => {
|
|
|
81
91
|
transformDOM: testParams,
|
|
82
92
|
generateDocumentPath: testParams,
|
|
83
93
|
preprocess: testParams,
|
|
84
|
-
},
|
|
94
|
+
}, {
|
|
95
|
+
createDocumentFromString,
|
|
96
|
+
}, {
|
|
85
97
|
originalURL: ORIGNAL_URL,
|
|
86
98
|
});
|
|
87
99
|
});
|
|
@@ -90,22 +102,94 @@ describe('html2x parameters', () => {
|
|
|
90
102
|
await html2md(URL, HTML, {
|
|
91
103
|
transform: testParams,
|
|
92
104
|
preprocess: testParams,
|
|
93
|
-
},
|
|
105
|
+
}, {
|
|
106
|
+
createDocumentFromString,
|
|
107
|
+
}, {
|
|
94
108
|
originalURL: ORIGNAL_URL,
|
|
95
109
|
});
|
|
96
110
|
|
|
97
111
|
await html2docx(URL, HTML, {
|
|
98
112
|
transform: testParams,
|
|
99
113
|
preprocess: testParams,
|
|
100
|
-
},
|
|
114
|
+
}, {
|
|
115
|
+
createDocumentFromString,
|
|
116
|
+
}, {
|
|
117
|
+
originalURL: ORIGNAL_URL,
|
|
118
|
+
});
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
it('document can be a Document', async () => {
|
|
122
|
+
const doc = createDocumentFromString(HTML);
|
|
123
|
+
await html2md(URL, doc, {
|
|
124
|
+
transformDOM: testParams,
|
|
125
|
+
generateDocumentPath: testParams,
|
|
126
|
+
preprocess: testParams,
|
|
127
|
+
}, {
|
|
128
|
+
createDocumentFromString,
|
|
129
|
+
}, {
|
|
130
|
+
originalURL: ORIGNAL_URL,
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
await html2docx(URL, doc, {
|
|
134
|
+
transformDOM: testParams,
|
|
135
|
+
generateDocumentPath: testParams,
|
|
136
|
+
preprocess: testParams,
|
|
137
|
+
}, {
|
|
138
|
+
createDocumentFromString,
|
|
139
|
+
}, {
|
|
101
140
|
originalURL: ORIGNAL_URL,
|
|
102
141
|
});
|
|
103
142
|
});
|
|
143
|
+
|
|
144
|
+
it('document cannot be a string in the testing context', async () => {
|
|
145
|
+
// we need JSDOM to create a document
|
|
146
|
+
// because importer default implementation relies on DOMParser
|
|
147
|
+
try {
|
|
148
|
+
await html2md(URL, HTML, {
|
|
149
|
+
transformDOM: testParams,
|
|
150
|
+
generateDocumentPath: testParams,
|
|
151
|
+
preprocess: testParams,
|
|
152
|
+
}, {
|
|
153
|
+
createDocumentFromString: null,
|
|
154
|
+
}, {
|
|
155
|
+
originalURL: ORIGNAL_URL,
|
|
156
|
+
});
|
|
157
|
+
fail('should have thrown an error: default createDocumentFromString works only in browser context');
|
|
158
|
+
} catch (e) {
|
|
159
|
+
ok(true);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
try {
|
|
163
|
+
await html2docx(URL, HTML, {
|
|
164
|
+
transformDOM: testParams,
|
|
165
|
+
generateDocumentPath: testParams,
|
|
166
|
+
preprocess: testParams,
|
|
167
|
+
}, {
|
|
168
|
+
createDocumentFromString: null,
|
|
169
|
+
}, {
|
|
170
|
+
originalURL: ORIGNAL_URL,
|
|
171
|
+
});
|
|
172
|
+
fail('should have thrown an error: default createDocumentFromString works only in browser context');
|
|
173
|
+
} catch (e) {
|
|
174
|
+
ok(true);
|
|
175
|
+
}
|
|
176
|
+
});
|
|
104
177
|
});
|
|
105
178
|
|
|
106
179
|
describe('html2md tests', () => {
|
|
107
180
|
it('html2md provides a default transformation', async () => {
|
|
108
|
-
const out = await html2md('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>'
|
|
181
|
+
const out = await html2md('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>', null, {
|
|
182
|
+
createDocumentFromString,
|
|
183
|
+
});
|
|
184
|
+
strictEqual(out.html.trim(), '<body><h1>Hello World</h1></body>');
|
|
185
|
+
strictEqual(out.md.trim(), '# Hello World');
|
|
186
|
+
strictEqual(out.path, '/page');
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
it('html2md accepts a string', async () => {
|
|
190
|
+
const out = await html2md('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>', null, {
|
|
191
|
+
createDocumentFromString,
|
|
192
|
+
});
|
|
109
193
|
strictEqual(out.html.trim(), '<body><h1>Hello World</h1></body>');
|
|
110
194
|
strictEqual(out.md.trim(), '# Hello World');
|
|
111
195
|
strictEqual(out.path, '/page');
|
|
@@ -119,6 +203,8 @@ describe('html2md tests', () => {
|
|
|
119
203
|
return p;
|
|
120
204
|
},
|
|
121
205
|
generateDocumentPath: () => '/folder/my-custom-path',
|
|
206
|
+
}, {
|
|
207
|
+
createDocumentFromString,
|
|
122
208
|
});
|
|
123
209
|
strictEqual(out.html.trim(), '<p>My Hello to the World</p>');
|
|
124
210
|
strictEqual(out.md.trim(), 'My Hello to the World');
|
|
@@ -142,6 +228,8 @@ describe('html2md tests', () => {
|
|
|
142
228
|
path: '/folder/my-custom-path-p2',
|
|
143
229
|
}];
|
|
144
230
|
},
|
|
231
|
+
}, {
|
|
232
|
+
createDocumentFromString,
|
|
145
233
|
});
|
|
146
234
|
|
|
147
235
|
const out1 = out[0];
|
|
@@ -169,6 +257,8 @@ describe('html2md tests', () => {
|
|
|
169
257
|
path: '/my-custom-path-p1',
|
|
170
258
|
};
|
|
171
259
|
},
|
|
260
|
+
}, {
|
|
261
|
+
createDocumentFromString,
|
|
172
262
|
});
|
|
173
263
|
|
|
174
264
|
strictEqual(out.html.trim(), '<p>My Hello to the World 1</p>');
|
|
@@ -209,6 +299,8 @@ describe('html2md tests', () => {
|
|
|
209
299
|
},
|
|
210
300
|
}];
|
|
211
301
|
},
|
|
302
|
+
}, {
|
|
303
|
+
createDocumentFromString,
|
|
212
304
|
});
|
|
213
305
|
|
|
214
306
|
const out1 = out[0];
|
|
@@ -247,6 +339,8 @@ describe('html2md tests', () => {
|
|
|
247
339
|
},
|
|
248
340
|
},
|
|
249
341
|
}],
|
|
342
|
+
}, {
|
|
343
|
+
createDocumentFromString,
|
|
250
344
|
});
|
|
251
345
|
|
|
252
346
|
// if no element provided, no creation of html, md or docx
|
|
@@ -264,6 +358,8 @@ describe('html2md tests', () => {
|
|
|
264
358
|
it('html2md does not crash if transform returns null', async () => {
|
|
265
359
|
const out = await html2md('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>', {
|
|
266
360
|
transform: () => null,
|
|
361
|
+
}, {
|
|
362
|
+
createDocumentFromString,
|
|
267
363
|
});
|
|
268
364
|
|
|
269
365
|
strictEqual(out.length, 0);
|
|
@@ -273,6 +369,8 @@ describe('html2md tests', () => {
|
|
|
273
369
|
const out = await html2md('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>', {
|
|
274
370
|
transformDOM: () => null,
|
|
275
371
|
generateDocumentPath: () => null,
|
|
372
|
+
}, {
|
|
373
|
+
createDocumentFromString,
|
|
276
374
|
});
|
|
277
375
|
strictEqual(out.html.trim(), '<body><h1>Hello World</h1></body>');
|
|
278
376
|
strictEqual(out.md.trim(), '# Hello World');
|
|
@@ -291,18 +389,25 @@ describe('html2md tests', () => {
|
|
|
291
389
|
return document.body;
|
|
292
390
|
},
|
|
293
391
|
},
|
|
392
|
+
{
|
|
393
|
+
createDocumentFromString,
|
|
394
|
+
},
|
|
294
395
|
);
|
|
295
396
|
strictEqual(out.html.trim(), '<body><img src="./image.png"></body>');
|
|
296
397
|
});
|
|
297
398
|
|
|
298
399
|
it('html2md removes images with src attributes', async () => {
|
|
299
|
-
const out = await html2md('https://www.sample.com/page.html', '<html><body><img src="data:abc"></body></html>'
|
|
400
|
+
const out = await html2md('https://www.sample.com/page.html', '<html><body><img src="data:abc"></body></html>', null, {
|
|
401
|
+
createDocumentFromString,
|
|
402
|
+
});
|
|
300
403
|
strictEqual(out.html.trim(), '<body></body>');
|
|
301
404
|
strictEqual(out.md.trim(), '');
|
|
302
405
|
});
|
|
303
406
|
|
|
304
407
|
it('html2md set image src with data-src attribute value', async () => {
|
|
305
|
-
const out = await html2md('https://www.sample.com/page.html', '<html><body><img src="data:abc" data-src="./image.jpg"></body></html>'
|
|
408
|
+
const out = await html2md('https://www.sample.com/page.html', '<html><body><img src="data:abc" data-src="./image.jpg"></body></html>', null, {
|
|
409
|
+
createDocumentFromString,
|
|
410
|
+
});
|
|
306
411
|
strictEqual(out.html.trim(), '<body><img src="./image.jpg" data-src="./image.jpg"></body>');
|
|
307
412
|
strictEqual(out.md.trim(), '![][image0]\n\n[image0]: ./image.jpg');
|
|
308
413
|
});
|
|
@@ -314,6 +419,8 @@ describe('html2md tests', () => {
|
|
|
314
419
|
img.setAttribute('src', img.getAttribute('data-fancy-src'));
|
|
315
420
|
img.removeAttribute('data-fancy-src');
|
|
316
421
|
},
|
|
422
|
+
}, {
|
|
423
|
+
createDocumentFromString,
|
|
317
424
|
});
|
|
318
425
|
strictEqual(out.html.trim(), '<body><img src="./image.jpg"></body>');
|
|
319
426
|
strictEqual(out.md.trim(), '![][image0]\n\n[image0]: ./image.jpg');
|
|
@@ -330,6 +437,8 @@ describe('html2md tests', () => {
|
|
|
330
437
|
p.after(hr);
|
|
331
438
|
return document.body;
|
|
332
439
|
},
|
|
440
|
+
}, {
|
|
441
|
+
createDocumentFromString,
|
|
333
442
|
});
|
|
334
443
|
strictEqual(out.html.trim(), '<body><p>text 1</p><p>text 2</p><p>text 3</p><p>text 4</p><hr></body>');
|
|
335
444
|
strictEqual(out.md.trim(), 'text 1\n\ntext 2\n\ntext 3\n\ntext 4\n\n---');
|
|
@@ -338,7 +447,9 @@ describe('html2md tests', () => {
|
|
|
338
447
|
|
|
339
448
|
describe('html2docx tests', () => {
|
|
340
449
|
it('html2docx provides a default transformation', async () => {
|
|
341
|
-
const out = await html2docx('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>'
|
|
450
|
+
const out = await html2docx('https://www.sample.com/page.html', '<html><body><h1>Hello World</h1></body></html>', null, {
|
|
451
|
+
createDocumentFromString,
|
|
452
|
+
});
|
|
342
453
|
strictEqual(out.html.trim(), '<body><h1>Hello World</h1></body>');
|
|
343
454
|
strictEqual(out.md.trim(), '# Hello World');
|
|
344
455
|
strictEqual(out.path, '/page');
|
|
@@ -357,6 +468,8 @@ describe('html2docx tests', () => {
|
|
|
357
468
|
return p;
|
|
358
469
|
},
|
|
359
470
|
generateDocumentPath: () => '/folder1/folder2/my-custom-path',
|
|
471
|
+
}, {
|
|
472
|
+
createDocumentFromString,
|
|
360
473
|
});
|
|
361
474
|
strictEqual(out.html.trim(), '<p>My Hello to the World</p>');
|
|
362
475
|
strictEqual(out.md.trim(), 'My Hello to the World');
|
|
@@ -14,13 +14,15 @@
|
|
|
14
14
|
|
|
15
15
|
import path from 'path';
|
|
16
16
|
import fs from 'fs-extra';
|
|
17
|
-
import { strictEqual, ok } from 'assert';
|
|
17
|
+
import { strictEqual, ok, fail } from 'assert';
|
|
18
18
|
import { describe, it } from 'mocha';
|
|
19
19
|
import { Response } from 'node-fetch';
|
|
20
20
|
import { dirname } from 'dirname-filename-esm';
|
|
21
21
|
|
|
22
22
|
import { docx2md } from '@adobe/helix-docx2md';
|
|
23
23
|
|
|
24
|
+
import { JSDOM } from 'jsdom';
|
|
25
|
+
|
|
24
26
|
import { unified } from 'unified';
|
|
25
27
|
import remarkParse from 'remark-parse';
|
|
26
28
|
import remarkGridTable from '@adobe/remark-gridtables';
|
|
@@ -38,6 +40,12 @@ const __dirname = dirname(import.meta);
|
|
|
38
40
|
|
|
39
41
|
const logger = new NoopLogger();
|
|
40
42
|
|
|
43
|
+
// test environment createDocumentFromString version using JSDOM
|
|
44
|
+
const createDocumentFromString = (html) => {
|
|
45
|
+
const { document } = new JSDOM(html, { runScripts: undefined }).window;
|
|
46
|
+
return document;
|
|
47
|
+
};
|
|
48
|
+
|
|
41
49
|
describe('PageImporter tests', () => {
|
|
42
50
|
const storageHandler = new MemoryHandler(logger);
|
|
43
51
|
const config = {
|
|
@@ -52,11 +60,31 @@ describe('PageImporter tests', () => {
|
|
|
52
60
|
}
|
|
53
61
|
}
|
|
54
62
|
|
|
55
|
-
const se = new TestImporter(
|
|
63
|
+
const se = new TestImporter({
|
|
64
|
+
createDocumentFromString,
|
|
65
|
+
...config,
|
|
66
|
+
});
|
|
56
67
|
const results = await se.import('someurl');
|
|
57
68
|
|
|
58
69
|
strictEqual(results.length, 0, 'expect no result');
|
|
59
70
|
});
|
|
71
|
+
|
|
72
|
+
it('import - not providing createDocumentFromString should fail in the test enviroment only', async () => {
|
|
73
|
+
class TestImporter extends PageImporter {
|
|
74
|
+
async fetch() {
|
|
75
|
+
return new Response('test');
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const se = new TestImporter(config);
|
|
80
|
+
|
|
81
|
+
try {
|
|
82
|
+
await se.import('someurl');
|
|
83
|
+
fail('should have thrown an error: default createDocumentFromString works only in browser context');
|
|
84
|
+
} catch (e) {
|
|
85
|
+
ok(true);
|
|
86
|
+
}
|
|
87
|
+
});
|
|
60
88
|
});
|
|
61
89
|
|
|
62
90
|
describe('PageImporter tests - various options', () => {
|
|
@@ -76,6 +104,7 @@ describe('PageImporter tests - various options', () => {
|
|
|
76
104
|
const config = {
|
|
77
105
|
storageHandler,
|
|
78
106
|
logger,
|
|
107
|
+
createDocumentFromString,
|
|
79
108
|
};
|
|
80
109
|
const se = new Test(config);
|
|
81
110
|
const results = await se.import('/someurl');
|
|
@@ -105,6 +134,7 @@ describe('PageImporter tests - various options', () => {
|
|
|
105
134
|
mdast2Docx2Options: {
|
|
106
135
|
stylesXML,
|
|
107
136
|
},
|
|
137
|
+
createDocumentFromString,
|
|
108
138
|
};
|
|
109
139
|
const se = new Test(config);
|
|
110
140
|
const results = await se.import('/someurl');
|
|
@@ -144,6 +174,7 @@ describe('PageImporter tests - fixtures', () => {
|
|
|
144
174
|
storageHandler,
|
|
145
175
|
skipDocxConversion: true,
|
|
146
176
|
logger,
|
|
177
|
+
createDocumentFromString,
|
|
147
178
|
};
|
|
148
179
|
const se = new Test(config);
|
|
149
180
|
const results = await se.import(`https://www.sample.com/${feature}`);
|
|
@@ -19,6 +19,26 @@ import { JSDOM } from 'jsdom';
|
|
|
19
19
|
|
|
20
20
|
import DOMUtils from '../../src/utils/DOMUtils.js';
|
|
21
21
|
|
|
22
|
+
describe('DOMUtils#fragment tests', () => {
|
|
23
|
+
const test = (input) => {
|
|
24
|
+
const { document } = (new JSDOM()).window;
|
|
25
|
+
const output = DOMUtils.fragment(document, input);
|
|
26
|
+
const div = document.createElement('div');
|
|
27
|
+
div.append(output);
|
|
28
|
+
const expected = document.createElement('div');
|
|
29
|
+
expected.innerHTML = input;
|
|
30
|
+
strictEqual(div.outerHTML, expected.outerHTML);
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
it('can create fragments from string', () => {
|
|
34
|
+
test('some text');
|
|
35
|
+
test(' some text with spaces ');
|
|
36
|
+
test('<a href="linkhref">linkcontent</a>');
|
|
37
|
+
test('<p><em>Caption Text</em></p>');
|
|
38
|
+
test('some text: <a href="linkhref">linkcontent</a> <a href="linkhref">another link</a>');
|
|
39
|
+
});
|
|
40
|
+
});
|
|
41
|
+
|
|
22
42
|
describe('DOMUtils#reviewInlineElement tests', () => {
|
|
23
43
|
const test = (input, tag, expected) => {
|
|
24
44
|
const { document } = (new JSDOM(input)).window;
|
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
-
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
-
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
-
*
|
|
7
|
-
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
-
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
-
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
-
* governing permissions and limitations under the License.
|
|
11
|
-
*/
|
|
12
|
-
/* eslint-disable class-methods-use-this, no-console */
|
|
13
|
-
|
|
14
|
-
import { JSDOM } from 'jsdom';
|
|
15
|
-
|
|
16
|
-
export default class PagingExplorer {
|
|
17
|
-
params;
|
|
18
|
-
|
|
19
|
-
constructor(params) {
|
|
20
|
-
this.params = params;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
async explore(
|
|
24
|
-
// eslint-disable-next-line default-param-last
|
|
25
|
-
page = 1,
|
|
26
|
-
pageCallback,
|
|
27
|
-
) {
|
|
28
|
-
const startTime = new Date().getTime();
|
|
29
|
-
|
|
30
|
-
let results = [];
|
|
31
|
-
|
|
32
|
-
while (page <= this.params.nbMaxPages) {
|
|
33
|
-
console.log(`${this.params.url}: Requesting page ${page}/${this.params.nbMaxPages}.`);
|
|
34
|
-
|
|
35
|
-
// eslint-disable-next-line no-await-in-loop
|
|
36
|
-
const res = await this.fetch(page);
|
|
37
|
-
|
|
38
|
-
if (!res.ok) {
|
|
39
|
-
console.log(`${this.params.url}: Invalid response, considering no more results`);
|
|
40
|
-
break;
|
|
41
|
-
} else {
|
|
42
|
-
// eslint-disable-next-line no-await-in-loop
|
|
43
|
-
const text = await res.text();
|
|
44
|
-
|
|
45
|
-
if (text) {
|
|
46
|
-
const { document } = new JSDOM(text).window;
|
|
47
|
-
|
|
48
|
-
const entries = this.process(document, results);
|
|
49
|
-
|
|
50
|
-
if (entries && entries.length > 0) {
|
|
51
|
-
results = results.concat(entries);
|
|
52
|
-
if (pageCallback) {
|
|
53
|
-
// eslint-disable-next-line no-await-in-loop
|
|
54
|
-
await pageCallback(entries, page, results);
|
|
55
|
-
}
|
|
56
|
-
} else {
|
|
57
|
-
console.log(`${this.params.url}: No entries found on page ${page}`);
|
|
58
|
-
break;
|
|
59
|
-
}
|
|
60
|
-
} else {
|
|
61
|
-
console.log(`${this.params.url}: No more results`);
|
|
62
|
-
break;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
// eslint-disable-next-line no-param-reassign
|
|
66
|
-
page += 1;
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
console.log();
|
|
71
|
-
console.log(`${this.params.url}: Process stopped at page ${page - 1} on ${this.params.nbMaxPages}.`);
|
|
72
|
-
console.log(`${this.params.url}: Imported ${results.length} post entries.`);
|
|
73
|
-
console.log(`${this.params.url}: Process took ${(new Date().getTime() - startTime) / 1000}s.`);
|
|
74
|
-
|
|
75
|
-
return results;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
fetch() {}
|
|
79
|
-
|
|
80
|
-
process() {}
|
|
81
|
-
}
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
-
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
-
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
-
*
|
|
7
|
-
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
-
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
-
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
-
* governing permissions and limitations under the License.
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
export default class PagingExplorerParams {
|
|
14
|
-
nbMaxPages;
|
|
15
|
-
|
|
16
|
-
url;
|
|
17
|
-
}
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
-
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
-
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
-
*
|
|
7
|
-
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
-
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
-
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
-
* governing permissions and limitations under the License.
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import FormData from 'form-data';
|
|
14
|
-
import fetch from 'node-fetch';
|
|
15
|
-
import PagingExplorer from '../../explorer/PagingExplorer.js';
|
|
16
|
-
|
|
17
|
-
const API = 'wp-admin/admin-ajax.php';
|
|
18
|
-
|
|
19
|
-
export default class WPAdminAjaxPager extends PagingExplorer {
|
|
20
|
-
async fetch(page) {
|
|
21
|
-
const api = `${this.params.url}${API}`;
|
|
22
|
-
const form = new FormData();
|
|
23
|
-
form.append('action', 'cardsFilter');
|
|
24
|
-
form.append('filterBy', 'latest');
|
|
25
|
-
form.append('paged', `${page}`);
|
|
26
|
-
return fetch(api, {
|
|
27
|
-
method: 'POST',
|
|
28
|
-
body: form,
|
|
29
|
-
});
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
// eslint-disable-next-line class-methods-use-this
|
|
33
|
-
process(document, all) {
|
|
34
|
-
const entries = [];
|
|
35
|
-
document.querySelectorAll('.card-item').forEach((el) => {
|
|
36
|
-
const link = el.querySelector('h4 a');
|
|
37
|
-
const url = link.getAttribute('href');
|
|
38
|
-
|
|
39
|
-
const entryDate = el.querySelector('.date');
|
|
40
|
-
const date = entryDate.textContent.trim();
|
|
41
|
-
|
|
42
|
-
if (all.findIndex((entry) => entry.url === url) === -1) {
|
|
43
|
-
entries.push({
|
|
44
|
-
date,
|
|
45
|
-
url,
|
|
46
|
-
});
|
|
47
|
-
}
|
|
48
|
-
});
|
|
49
|
-
return entries;
|
|
50
|
-
}
|
|
51
|
-
}
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
-
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
-
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
-
*
|
|
7
|
-
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
-
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
-
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
-
* governing permissions and limitations under the License.
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import fetch from 'node-fetch';
|
|
14
|
-
import PagingExplorer from '../../explorer/PagingExplorer.js';
|
|
15
|
-
|
|
16
|
-
const API = 'page/';
|
|
17
|
-
|
|
18
|
-
export default class WPContentPager extends PagingExplorer {
|
|
19
|
-
async fetch(page) {
|
|
20
|
-
const api = `${this.params.url}${API}${page}`;
|
|
21
|
-
return fetch(api);
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
// eslint-disable-next-line class-methods-use-this
|
|
25
|
-
process(document, all) {
|
|
26
|
-
const entries = [];
|
|
27
|
-
document.querySelectorAll('main .content .entry, main .entries .entry, article .entries .entry').forEach((el) => {
|
|
28
|
-
const link = el.querySelector('h2 a');
|
|
29
|
-
if (link) {
|
|
30
|
-
const url = link.getAttribute('href');
|
|
31
|
-
|
|
32
|
-
const entryDate = el.querySelector('.date') || el.querySelector('.entry_footer');
|
|
33
|
-
let date = '';
|
|
34
|
-
if (entryDate) {
|
|
35
|
-
date = entryDate.textContent.trim();
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
if (all.findIndex((entry) => entry.url === url) === -1) {
|
|
39
|
-
entries.push({
|
|
40
|
-
date,
|
|
41
|
-
url,
|
|
42
|
-
});
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
});
|
|
46
|
-
return entries;
|
|
47
|
-
}
|
|
48
|
-
}
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright 2021 Adobe. All rights reserved.
|
|
3
|
-
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
-
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
-
*
|
|
7
|
-
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
-
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
-
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
-
* governing permissions and limitations under the License.
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import fetch from 'node-fetch';
|
|
14
|
-
import PagingExplorer from '../../explorer/PagingExplorer.js';
|
|
15
|
-
|
|
16
|
-
const API = 'page/';
|
|
17
|
-
|
|
18
|
-
export default class WPPostWrapPager extends PagingExplorer {
|
|
19
|
-
async fetch(page) {
|
|
20
|
-
const api = `${this.params.url}${API}${page}`;
|
|
21
|
-
return fetch(api);
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
// eslint-disable-next-line class-methods-use-this
|
|
25
|
-
process(document, all) {
|
|
26
|
-
const entries = [];
|
|
27
|
-
document.querySelectorAll('.post-meta-wrap').forEach((el) => {
|
|
28
|
-
const link = el.querySelector('.post-item > a');
|
|
29
|
-
const url = link.getAttribute('href');
|
|
30
|
-
|
|
31
|
-
const entryDate = el.querySelector('.post-date');
|
|
32
|
-
const date = entryDate.textContent.trim();
|
|
33
|
-
|
|
34
|
-
if (all.findIndex((entry) => entry.url === url) === -1) {
|
|
35
|
-
entries.push({
|
|
36
|
-
date,
|
|
37
|
-
url,
|
|
38
|
-
});
|
|
39
|
-
}
|
|
40
|
-
});
|
|
41
|
-
return entries;
|
|
42
|
-
}
|
|
43
|
-
}
|