@adobe/helix-importer 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,207 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import { JSDOM } from 'jsdom';
14
+
15
+ export default class DOMUtils {
16
+ static EMPTY_TAGS_TO_PRESERVE = ['img', 'video', 'iframe', 'div', 'picture'];
17
+
18
+ static reviewInlineElement(document, tagName) {
19
+ let tags = [...document.querySelectorAll(tagName)];
20
+ // first pass, remove empty nodes
21
+ for (let i = tags.length - 1; i >= 0; i -= 1) {
22
+ const tag = tags[i];
23
+ if (tag.textContent === '' && !tag.querySelector(DOMUtils.EMPTY_TAGS_TO_PRESERVE.join(','))) {
24
+ tag.remove();
25
+ } else {
26
+ tag.innerHTML = tag.innerHTML.replace(/ /gm, ' ');
27
+ }
28
+ }
29
+
30
+ tags = [...document.querySelectorAll(tagName)];
31
+ // make a first pass to find <tag>x</tag> <tag>y</tag> and move the space
32
+ for (let i = tags.length - 1; i >= 0; i -= 1) {
33
+ const tag = tags[i];
34
+ if (tag.nextSibling && tag.nextSibling.textContent === ' ') {
35
+ // next is a space, check next next
36
+ const nextNext = tag.nextSibling.nextSibling;
37
+ if (nextNext && nextNext.tagName && nextNext.tagName.toLowerCase() === tagName) {
38
+ // same tag
39
+ tag.nextSibling.remove();
40
+ tag.innerHTML = `${tag.innerHTML} `;
41
+ }
42
+ }
43
+ }
44
+
45
+ tags = [...document.querySelectorAll(tagName)];
46
+ // collaspe consecutive <tag>
47
+ // and make sure element does not start ends with spaces while it is before / after some text
48
+ for (let i = tags.length - 1; i >= 0; i -= 1) {
49
+ const tag = tags[i];
50
+ if (tag.innerHTML === '.' || tag.innerHTML === '. ' || tag.innerHTML === ':' || tag.innerHTML === ': ') {
51
+ tag.replaceWith(JSDOM.fragment(tag.innerHTML));
52
+ } else {
53
+ const { innerHTML } = tag;
54
+ if (tag.previousSibling) {
55
+ const previous = tag.previousSibling;
56
+ if (
57
+ previous.tagName
58
+ && previous.tagName.toLowerCase() === tagName
59
+ && (!previous.href || previous.href === tag.href)
60
+ ) {
61
+ if (tag.hasChildNodes()) {
62
+ [...tag.childNodes].forEach((child) => {
63
+ previous.append(child);
64
+ });
65
+ } else {
66
+ // previous sibling is an <tag>, merge current one inside the previous one
67
+ previous.append(innerHTML);
68
+ }
69
+ tag.remove();
70
+ }
71
+ }
72
+ }
73
+ }
74
+
75
+ tags = [...document.querySelectorAll(tagName)];
76
+ // extra leading and trailing spaces into a dedicated span
77
+ for (let i = tags.length - 1; i >= 0; i -= 1) {
78
+ const tag = tags[i];
79
+ let { innerHTML } = tag;
80
+ if (innerHTML) {
81
+ if (innerHTML.lastIndexOf(' ') === innerHTML.length - 1) {
82
+ // move trailing space to a new text node outside of current element
83
+ tag.innerHTML = innerHTML.slice(0, innerHTML.length - 1);
84
+ ({ innerHTML } = tag);
85
+ tag.after(JSDOM.fragment('<span> </span>'));
86
+ }
87
+
88
+ if (innerHTML.indexOf(' ') === 0) {
89
+ // move leading space to a new text node outside of current element
90
+ tag.innerHTML = innerHTML.slice(1);
91
+ tag.before(JSDOM.fragment('<span> </span>'));
92
+ }
93
+ }
94
+ }
95
+ }
96
+
97
+ static reviewParagraphs(document) {
98
+ const tags = [...document.querySelectorAll('p')];
99
+ for (let i = tags.length - 1; i >= 0; i -= 1) {
100
+ const tag = tags[i];
101
+ // remove useless paragraphs
102
+ if (
103
+ (tag.textContent === ''
104
+ || tag.textContent === ' '
105
+ || tag.textContent === '&nbsp;'
106
+ || tag.textContent.charCodeAt(0) === 160)
107
+ && !tag.querySelector(DOMUtils.EMPTY_TAGS_TO_PRESERVE.join(','))
108
+ ) {
109
+ tag.remove();
110
+ }
111
+ }
112
+ }
113
+
114
+ static escapeSpecialCharacters(document) {
115
+ // eslint-disable-next-line no-param-reassign
116
+ document.body.innerHTML = document.body.innerHTML.replace(/~/gm, '\\~');
117
+ }
118
+
119
+ static reviewHeadings(document) {
120
+ const tags = [...document.querySelectorAll('h1, h2, h3, h4, h5, h6')];
121
+ for (let i = tags.length - 1; i >= 0; i -= 1) {
122
+ const tag = tags[i];
123
+ // remove useless strong tags
124
+ tag.innerHTML = tag.innerHTML.replace(/<strong>|<\\strong>/gm, '');
125
+ if (tag.innerHTML === '') {
126
+ tag.remove();
127
+ }
128
+ }
129
+ }
130
+
131
+ static remove(document, selectors) {
132
+ selectors.forEach((s) => {
133
+ document.querySelectorAll(s).forEach((n) => n.remove());
134
+ });
135
+ }
136
+
137
+ static removeComments(document) {
138
+ // eslint-disable-next-line no-param-reassign
139
+ document.body.innerHTML = document.body.innerHTML
140
+ // remove html comments
141
+ .replace(/<!--(?!>)[\S\s]*?-->/gm, '');
142
+ }
143
+
144
+ static removeSpans(document) {
145
+ // remove spans
146
+ document.querySelectorAll('span').forEach((span) => {
147
+ if (span.textContent === '') {
148
+ span.remove();
149
+ } else {
150
+ span.replaceWith(JSDOM.fragment(span.innerHTML));
151
+ }
152
+ });
153
+ }
154
+
155
+ static replaceByCaptions(document, selectors) {
156
+ selectors.forEach((selector) => {
157
+ document.querySelectorAll(selector).forEach((elem) => {
158
+ const captionText = elem.textContent.trim();
159
+ elem.parentNode.insertBefore(JSDOM.fragment(`<p><em>${captionText}</em></p>`), elem);
160
+ elem.remove();
161
+ });
162
+ });
163
+ }
164
+
165
+ static generateEmbed(url) {
166
+ return JSDOM.fragment(`<table><tr><th>Embed</th></tr><tr><td><a href="${url}">${url}</a></td></tr></table>`);
167
+ }
168
+
169
+ static replaceEmbeds(document) {
170
+ document.querySelectorAll('iframe').forEach((iframe) => {
171
+ const src = iframe.getAttribute('src');
172
+ const dataSrc = iframe.getAttribute('data-src');
173
+ const url = dataSrc || src;
174
+ if (url) {
175
+ iframe.after(DOMUtils.generateEmbed(url));
176
+ }
177
+ iframe.remove();
178
+ });
179
+
180
+ document.querySelectorAll('video').forEach((video) => {
181
+ let blockType = 'Video';
182
+ if (video.autoplay) {
183
+ blockType = 'Animation';
184
+ }
185
+ const anim = JSDOM.fragment(`<table><tr><th>${blockType}</th></tr><tr><td>${video.outerHTML}</td></tr></table>`);
186
+ video.replaceWith(anim);
187
+ });
188
+ }
189
+
190
+ static removeNoscripts(html) {
191
+ return html.replace(/<noscript>((.|\n)*?)<\/noscript>/gm, '');
192
+ }
193
+
194
+ static encodeImagesForTable(document) {
195
+ const imgs = document.querySelectorAll('img');
196
+ imgs.forEach((img) => {
197
+ if (img.closest('table')) {
198
+ // if image is in a table
199
+ if (img.title && img.title.indexOf('|') !== -1) {
200
+ // pipes in title do not get encoded
201
+ // eslint-disable-next-line no-param-reassign
202
+ img.title = img.title.replace(/\|/gm, '\\|');
203
+ }
204
+ }
205
+ });
206
+ }
207
+ }
@@ -0,0 +1,25 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import sanitize from 'sanitize-filename';
14
+
15
+ export default class FileUtils {
16
+ static sanitizeFilename(name) {
17
+ return sanitize(decodeURIComponent(name))
18
+ .trim()
19
+ .toLowerCase()
20
+ .replace(/\./gm, '')
21
+ .replace(/&/gm, '')
22
+ .replace(/\s/g, '-')
23
+ .replace(/-{2,}/g, '-');
24
+ }
25
+ }
@@ -0,0 +1,20 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ export default class Utils {
14
+ static async asyncForEach(array, callback) {
15
+ for (let index = 0; index < array.length; index += 1) {
16
+ // eslint-disable-next-line no-await-in-loop
17
+ await callback(array[index], index, array);
18
+ }
19
+ }
20
+ }
@@ -0,0 +1,68 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+ import { JSDOM } from 'jsdom';
13
+
14
+ import DOMUtils from '../utils/DOMUtils.js';
15
+
16
+ export default class WPUtils {
17
+ static handleCaptions(document) {
18
+ DOMUtils.replaceByCaptions(document, ['.wp-caption-text', 'figcaption']);
19
+
20
+ // an h5 following an image / video is a caption
21
+ document.querySelectorAll('p img, video').forEach((item) => {
22
+ if (
23
+ (item.parentNode.nextElementSibling && item.parentNode.nextElementSibling.tagName === 'H5')
24
+ || (item.nextElementSibling && item.nextElementSibling.tagName === 'H5')
25
+ ) {
26
+ const elem = item.parentNode.nextElementSibling && item.parentNode.nextElementSibling.tagName === 'H5'
27
+ ? item.parentNode.nextElementSibling
28
+ : item.nextElementSibling;
29
+ const captionText = elem.textContent.trim();
30
+ elem.parentNode.insertBefore(JSDOM.fragment(`<p><em>${captionText}</em><p>`), elem);
31
+ elem.remove();
32
+ }
33
+ });
34
+ }
35
+
36
+ static genericDOMCleanup(document) {
37
+ // extract "emphasis" from links
38
+ // see https://github.com/adobe/helix-pipeline/issues/895
39
+ document.querySelectorAll('a strong').forEach((elem) => {
40
+ const parent = elem.parentNode;
41
+ if (parent.childNodes.length === 1) {
42
+ // only cover case with 1 child
43
+ const txt = elem.textContent;
44
+ // only treat links
45
+ if (txt && (txt.indexOf('.') !== -1 || txt.indexOf(':') !== -1)) {
46
+ // eslint-disable-next-line no-param-reassign
47
+ elem.innerHTML = '';
48
+ // take out of parent
49
+ parent.parentNode.insertBefore(elem, parent.nextSibling);
50
+ elem.appendChild(parent);
51
+ parent.innerHTML = txt;
52
+ }
53
+ }
54
+ });
55
+
56
+ // some images are in headings...
57
+ document.querySelectorAll('h1 img, h2 img, h3 img, h4 img, h5 img, h6 img').forEach((img) => {
58
+ // move image after its parent heading
59
+ img.parentNode.parentNode.insertBefore(img, img.parentNode.nextSibling);
60
+ });
61
+
62
+ // heading could be full of tags
63
+ document.querySelectorAll('h1, h2, h3, h4, h5, h6').forEach((h) => {
64
+ // eslint-disable-next-line no-param-reassign
65
+ h.innerHTML = h.textContent;
66
+ });
67
+ }
68
+ }
@@ -0,0 +1,51 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import FormData from 'form-data';
14
+ import fetch from 'node-fetch';
15
+ import PagingExplorer from '../../explorer/PagingExplorer.js';
16
+
17
+ const API = 'wp-admin/admin-ajax.php';
18
+
19
+ export default class WPAdminAjaxPager extends PagingExplorer {
20
+ async fetch(page) {
21
+ const api = `${this.params.url}${API}`;
22
+ const form = new FormData();
23
+ form.append('action', 'cardsFilter');
24
+ form.append('filterBy', 'latest');
25
+ form.append('paged', `${page}`);
26
+ return fetch(api, {
27
+ method: 'POST',
28
+ body: form,
29
+ });
30
+ }
31
+
32
+ // eslint-disable-next-line class-methods-use-this
33
+ process(document, all) {
34
+ const entries = [];
35
+ document.querySelectorAll('.card-item').forEach((el) => {
36
+ const link = el.querySelector('h4 a');
37
+ const url = link.getAttribute('href');
38
+
39
+ const entryDate = el.querySelector('.date');
40
+ const date = entryDate.textContent.trim();
41
+
42
+ if (all.findIndex((entry) => entry.url === url) === -1) {
43
+ entries.push({
44
+ date,
45
+ url,
46
+ });
47
+ }
48
+ });
49
+ return entries;
50
+ }
51
+ }
@@ -0,0 +1,48 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import fetch from 'node-fetch';
14
+ import PagingExplorer from '../../explorer/PagingExplorer.js';
15
+
16
+ const API = 'page/';
17
+
18
+ export default class WPContentPager extends PagingExplorer {
19
+ async fetch(page) {
20
+ const api = `${this.params.url}${API}${page}`;
21
+ return fetch(api);
22
+ }
23
+
24
+ // eslint-disable-next-line class-methods-use-this
25
+ process(document, all) {
26
+ const entries = [];
27
+ document.querySelectorAll('main .content .entry, main .entries .entry, article .entries .entry').forEach((el) => {
28
+ const link = el.querySelector('h2 a');
29
+ if (link) {
30
+ const url = link.getAttribute('href');
31
+
32
+ const entryDate = el.querySelector('.date') || el.querySelector('.entry_footer');
33
+ let date = '';
34
+ if (entryDate) {
35
+ date = entryDate.textContent.trim();
36
+ }
37
+
38
+ if (all.findIndex((entry) => entry.url === url) === -1) {
39
+ entries.push({
40
+ date,
41
+ url,
42
+ });
43
+ }
44
+ }
45
+ });
46
+ return entries;
47
+ }
48
+ }
@@ -0,0 +1,43 @@
1
+ /*
2
+ * Copyright 2021 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import fetch from 'node-fetch';
14
+ import PagingExplorer from '../../explorer/PagingExplorer.js';
15
+
16
+ const API = 'page/';
17
+
18
+ export default class WPPostWrapPager extends PagingExplorer {
19
+ async fetch(page) {
20
+ const api = `${this.params.url}${API}${page}`;
21
+ return fetch(api);
22
+ }
23
+
24
+ // eslint-disable-next-line class-methods-use-this
25
+ process(document, all) {
26
+ const entries = [];
27
+ document.querySelectorAll('.post-meta-wrap').forEach((el) => {
28
+ const link = el.querySelector('.post-item > a');
29
+ const url = link.getAttribute('href');
30
+
31
+ const entryDate = el.querySelector('.post-date');
32
+ const date = entryDate.textContent.trim();
33
+
34
+ if (all.findIndex((entry) => entry.url === url) === -1) {
35
+ entries.push({
36
+ date,
37
+ url,
38
+ });
39
+ }
40
+ });
41
+ return entries;
42
+ }
43
+ }
@@ -0,0 +1,205 @@
1
+ /*
2
+ * Copyright 2020 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ /* eslint-disable max-classes-per-file, class-methods-use-this */
14
+
15
+ import { deepStrictEqual, strictEqual } from 'assert';
16
+ import { describe, it } from 'mocha';
17
+
18
+ import { Response } from 'node-fetch';
19
+
20
+ import PagingExplorer from '../../src/explorer/PagingExplorer.js';
21
+
22
+ describe('PagingExplorer tests', () => {
23
+ const params = {
24
+ url: 'testdest',
25
+ nbMaxPages: 3,
26
+ };
27
+
28
+ it('fetch and process are called 3 times if end not reached', async () => {
29
+ let fetchCalled = 0;
30
+ let processCalled = 0;
31
+
32
+ class Test extends PagingExplorer {
33
+ async fetch() {
34
+ fetchCalled += 1;
35
+ return new Response('test');
36
+ }
37
+
38
+ process() {
39
+ processCalled += 1;
40
+ return [{
41
+ a: 1,
42
+ }];
43
+ }
44
+ }
45
+
46
+ const se = new Test(params);
47
+ await se.explore();
48
+
49
+ strictEqual(fetchCalled, 3, 'process is called 3 times');
50
+ strictEqual(processCalled, 3, 'process is called 3 times');
51
+ });
52
+
53
+ it('fetch stops the process when reaches the end', async () => {
54
+ let fetchCalled = 0;
55
+ let processCalled = 0;
56
+
57
+ class Test extends PagingExplorer {
58
+ async fetch() {
59
+ fetchCalled += 1;
60
+ if (fetchCalled > 1) {
61
+ return new Response('reached the end', { status: 404 });
62
+ }
63
+ return new Response('test');
64
+ }
65
+
66
+ process() {
67
+ processCalled += 1;
68
+ return [{
69
+ a: 1,
70
+ }];
71
+ }
72
+ }
73
+
74
+ const se = new Test(params);
75
+ await se.explore();
76
+
77
+ strictEqual(fetchCalled, 2, 'fetch is called 2 times');
78
+ strictEqual(processCalled, 1, 'process is called 1 time');
79
+ });
80
+
81
+ it('explore returns the expected result set', async () => {
82
+ let processCalled = 0;
83
+ class Test extends PagingExplorer {
84
+ async fetch() {
85
+ return new Response('test');
86
+ }
87
+
88
+ process() {
89
+ processCalled += 1;
90
+ return [{
91
+ a: processCalled,
92
+ }];
93
+ }
94
+ }
95
+
96
+ const se = new Test(params);
97
+ const results = await se.explore();
98
+
99
+ deepStrictEqual(results, [{ a: 1 }, { a: 2 }, { a: 3 }], 'result is correct');
100
+ });
101
+
102
+ it('explore returns the expected result set when number of pages is not the max one', async () => {
103
+ let fetchCalled = 0;
104
+ let processCalled = 0;
105
+ class Test extends PagingExplorer {
106
+ async fetch() {
107
+ fetchCalled += 1;
108
+ if (fetchCalled > 2) {
109
+ return new Response('reached the end', { status: 404 });
110
+ }
111
+ return new Response('test');
112
+ }
113
+
114
+ process() {
115
+ processCalled += 1;
116
+ return [{
117
+ a: processCalled,
118
+ }];
119
+ }
120
+ }
121
+
122
+ const se = new Test(params);
123
+ const results = await se.explore();
124
+
125
+ deepStrictEqual(results, [{ a: 1 }, { a: 2 }], 'result is correct');
126
+ });
127
+
128
+ it('explore, fetch and process can be used to retrieve multipage results', async () => {
129
+ let fetchCalled = 0;
130
+ class Test extends PagingExplorer {
131
+ async fetch() {
132
+ fetchCalled += 1;
133
+ if (fetchCalled > 2) {
134
+ return new Response('reached the end', { status: 404 });
135
+ }
136
+ return new Response(`<html>
137
+ <body>
138
+ <a href="a${fetchCalled}.html">a${fetchCalled}</a>
139
+ <a href="b${fetchCalled}.html">b${fetchCalled}</a>
140
+ <a href="c${fetchCalled}.html">c${fetchCalled}</a>
141
+ </body
142
+ </html>`);
143
+ }
144
+
145
+ process(document) {
146
+ const entries = [];
147
+ document.querySelectorAll('a').forEach((el) => {
148
+ entries.push({
149
+ link: el.getAttribute('href'),
150
+ });
151
+ });
152
+ return entries;
153
+ }
154
+ }
155
+
156
+ const se = new Test(params);
157
+ const results = await se.explore();
158
+
159
+ deepStrictEqual(results, [
160
+ { link: 'a1.html' },
161
+ { link: 'b1.html' },
162
+ { link: 'c1.html' },
163
+ { link: 'a2.html' },
164
+ { link: 'b2.html' },
165
+ { link: 'c2.html' },
166
+ ], 'result is correct');
167
+ });
168
+
169
+ it('process receives the entry set from previous pages', async () => {
170
+ let fetchCalled = 0;
171
+ class Test extends PagingExplorer {
172
+ async fetch() {
173
+ fetchCalled += 1;
174
+ return new Response(`<html>
175
+ <body>
176
+ <a href="a${fetchCalled}.html">a${fetchCalled}</a>
177
+ <a href="b${fetchCalled}.html">b${fetchCalled}</a>
178
+ <a href="c${fetchCalled}.html">c${fetchCalled}</a>
179
+ </body
180
+ </html>`);
181
+ }
182
+
183
+ process(document, all) {
184
+ const testResult = [];
185
+ for (let i = 1; i < fetchCalled; i += 1) {
186
+ testResult.push({ link: `a${i}.html` });
187
+ testResult.push({ link: `b${i}.html` });
188
+ testResult.push({ link: `c${i}.html` });
189
+ }
190
+ deepStrictEqual(all, testResult, 'all entries argument contains previous entries from previous pages');
191
+
192
+ const entries = [];
193
+ document.querySelectorAll('a').forEach((el) => {
194
+ entries.push({
195
+ link: el.getAttribute('href'),
196
+ });
197
+ });
198
+ return entries;
199
+ }
200
+ }
201
+
202
+ const se = new Test(params);
203
+ await se.explore();
204
+ });
205
+ });