@govtechsg/oobee 0.10.85 → 0.10.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/image.yml +3 -2
- package/.github/workflows/publish.yml +10 -0
- package/DETAILS.md +29 -0
- package/dist/combine.js +1 -1
- package/dist/constants/common.js +15 -4
- package/dist/constants/constants.js +604 -1
- package/dist/crawlers/commonCrawlerFunc.js +3 -2
- package/dist/crawlers/crawlSitemap.js +98 -80
- package/dist/crawlers/custom/utils.js +137 -31
- package/dist/crawlers/guards/urlGuard.js +8 -15
- package/dist/crawlers/runCustom.js +18 -11
- package/dist/generateOobeeClientScanner.js +570 -0
- package/dist/mergeAxeResults.js +5 -4
- package/dist/npmIndex.js +10 -2
- package/dist/proxyService.js +18 -3
- package/dist/services/s3Uploader.js +21 -10
- package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
- package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
- package/dist/static/ejs/summary.ejs +10 -5
- package/oobee-client-scanner.js +34992 -0
- package/package.json +2 -2
- package/src/combine.ts +3 -1
- package/src/constants/common.ts +22 -10
- package/src/constants/constants.ts +602 -1
- package/src/crawlers/commonCrawlerFunc.ts +4 -3
- package/src/crawlers/crawlSitemap.ts +116 -98
- package/src/crawlers/custom/utils.ts +143 -38
- package/src/crawlers/guards/urlGuard.ts +24 -31
- package/src/crawlers/runCustom.ts +29 -11
- package/src/generateOobeeClientScanner.ts +591 -0
- package/src/mergeAxeResults.ts +5 -3
- package/src/npmIndex.ts +12 -2
- package/src/proxyService.ts +25 -4
- package/src/services/s3Uploader.ts +23 -11
- package/src/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
- package/src/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
- package/src/static/ejs/summary.ejs +10 -5
- package/testStaticJSScanner.html +534 -0
|
@@ -116,7 +116,7 @@ export const filterAxeResults = (results, pageTitle, customFlowDetails) => {
|
|
|
116
116
|
return;
|
|
117
117
|
const conformance = tags.filter(tag => tag.startsWith('wcag') || tag === 'best-practice');
|
|
118
118
|
nodes.forEach(node => {
|
|
119
|
-
const { html } = node;
|
|
119
|
+
const { html, target } = node;
|
|
120
120
|
if (!(rule in passed.rules)) {
|
|
121
121
|
passed.rules[rule] = {
|
|
122
122
|
description,
|
|
@@ -128,7 +128,8 @@ export const filterAxeResults = (results, pageTitle, customFlowDetails) => {
|
|
|
128
128
|
};
|
|
129
129
|
}
|
|
130
130
|
const finalHtml = truncateHtml(html);
|
|
131
|
-
|
|
131
|
+
const xpath = target.length === 1 && typeof target[0] === 'string' ? target[0] : undefined;
|
|
132
|
+
passed.rules[rule].items.push({ html: finalHtml, screenshotPath: '', message: '', xpath: xpath || '' });
|
|
132
133
|
passed.totalItems += 1;
|
|
133
134
|
passed.rules[rule].totalItems += 1;
|
|
134
135
|
totalItems += 1;
|
|
@@ -12,6 +12,7 @@ const crawlSitemap = async ({ sitemapUrl, randomToken, host, viewportSettings, m
|
|
|
12
12
|
let dataset;
|
|
13
13
|
let urlsCrawled;
|
|
14
14
|
let durationExceeded = false;
|
|
15
|
+
let isAbortingScan = false;
|
|
15
16
|
if (fromCrawlIntelligentSitemap) {
|
|
16
17
|
dataset = datasetFromIntelligent;
|
|
17
18
|
urlsCrawled = urlsCrawledFromIntelligent;
|
|
@@ -144,106 +145,123 @@ const crawlSitemap = async ({ sitemapUrl, randomToken, host, viewportSettings, m
|
|
|
144
145
|
});
|
|
145
146
|
return;
|
|
146
147
|
}
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
if (hasExceededDuration) {
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
if (request.skipNavigation && actualUrl === 'about:blank') {
|
|
159
|
-
if (isScanPdfs) {
|
|
160
|
-
// pushes download promise into pdfDownloads
|
|
161
|
-
const { pdfFileName, url } = handlePdfDownload(randomToken, pdfDownloads, request, sendRequest, urlsCrawled);
|
|
162
|
-
uuidToPdfMapping[pdfFileName] = url;
|
|
148
|
+
try {
|
|
149
|
+
await waitForPageLoaded(page, 10000);
|
|
150
|
+
const actualUrl = page.url() || request.loadedUrl || request.url;
|
|
151
|
+
const hasExceededDuration = scanDuration > 0 && Date.now() - crawlStartTime > scanDuration * 1000;
|
|
152
|
+
if (urlsCrawled.scanned.length >= maxRequestsPerCrawl || hasExceededDuration) {
|
|
153
|
+
isAbortingScan = true;
|
|
154
|
+
if (hasExceededDuration) {
|
|
155
|
+
console.log(`Crawl duration of ${scanDuration}s exceeded. Aborting sitemap crawl.`);
|
|
156
|
+
durationExceeded = true;
|
|
157
|
+
}
|
|
158
|
+
crawler.autoscaledPool.abort(); // stops new requests
|
|
163
159
|
return;
|
|
164
160
|
}
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
});
|
|
176
|
-
return;
|
|
177
|
-
}
|
|
178
|
-
const contentType = response?.headers?.()['content-type'] || '';
|
|
179
|
-
const status = response ? response.status() : 0;
|
|
180
|
-
if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
|
|
181
|
-
const isRedirected = !areLinksEqual(page.url(), request.url);
|
|
182
|
-
const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(item => (item.actualUrl || item.url) === page.url());
|
|
183
|
-
if (isRedirected && isLoadedUrlInCrawledUrls) {
|
|
184
|
-
urlsCrawled.notScannedRedirects.push({
|
|
185
|
-
fromUrl: request.url,
|
|
186
|
-
toUrl: actualUrl, // i.e. actualUrl
|
|
161
|
+
if (request.skipNavigation && actualUrl === 'about:blank') {
|
|
162
|
+
if (isScanPdfs) {
|
|
163
|
+
// pushes download promise into pdfDownloads
|
|
164
|
+
const { pdfFileName, url } = handlePdfDownload(randomToken, pdfDownloads, request, sendRequest, urlsCrawled);
|
|
165
|
+
uuidToPdfMapping[pdfFileName] = url;
|
|
166
|
+
return;
|
|
167
|
+
}
|
|
168
|
+
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
|
169
|
+
numScanned: urlsCrawled.scanned.length,
|
|
170
|
+
urlScanned: request.url,
|
|
187
171
|
});
|
|
188
|
-
return;
|
|
189
|
-
}
|
|
190
|
-
// This logic is different from crawlDomain, as it also checks if the pae is redirected before checking if it is excluded using exclusions.txt
|
|
191
|
-
if (isRedirected && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
|
|
192
172
|
urlsCrawled.userExcluded.push({
|
|
193
173
|
url: request.url,
|
|
194
174
|
pageTitle: request.url,
|
|
195
|
-
actualUrl,
|
|
196
|
-
metadata: STATUS_CODE_METADATA[
|
|
197
|
-
httpStatusCode:
|
|
175
|
+
actualUrl: request.url, // because about:blank is not useful
|
|
176
|
+
metadata: STATUS_CODE_METADATA[1],
|
|
177
|
+
httpStatusCode: 1,
|
|
198
178
|
});
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
181
|
+
const contentType = response?.headers?.()['content-type'] || '';
|
|
182
|
+
const status = response ? response.status() : 0;
|
|
183
|
+
if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
|
|
184
|
+
const isRedirected = !areLinksEqual(page.url(), request.url);
|
|
185
|
+
const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(item => (item.actualUrl || item.url) === page.url());
|
|
186
|
+
if (isRedirected && isLoadedUrlInCrawledUrls) {
|
|
187
|
+
urlsCrawled.notScannedRedirects.push({
|
|
188
|
+
fromUrl: request.url,
|
|
189
|
+
toUrl: actualUrl, // i.e. actualUrl
|
|
190
|
+
});
|
|
191
|
+
return;
|
|
192
|
+
}
|
|
193
|
+
// This logic is different from crawlDomain, as it also checks if the pae is redirected before checking if it is excluded using exclusions.txt
|
|
194
|
+
if (isRedirected && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
|
|
195
|
+
urlsCrawled.userExcluded.push({
|
|
196
|
+
url: request.url,
|
|
197
|
+
pageTitle: request.url,
|
|
198
|
+
actualUrl,
|
|
199
|
+
metadata: STATUS_CODE_METADATA[0],
|
|
200
|
+
httpStatusCode: 0,
|
|
201
|
+
});
|
|
202
|
+
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
|
203
|
+
numScanned: urlsCrawled.scanned.length,
|
|
204
|
+
urlScanned: request.url,
|
|
205
|
+
});
|
|
206
|
+
return;
|
|
207
|
+
}
|
|
208
|
+
const results = await runAxeScript({ includeScreenshots, page, randomToken });
|
|
209
|
+
guiInfoLog(guiInfoStatusTypes.SCANNED, {
|
|
210
|
+
numScanned: urlsCrawled.scanned.length,
|
|
211
|
+
urlScanned: request.url,
|
|
212
|
+
});
|
|
213
|
+
urlsCrawled.scanned.push({
|
|
214
|
+
url: request.url,
|
|
215
|
+
pageTitle: results.pageTitle,
|
|
216
|
+
actualUrl, // i.e. actualUrl
|
|
217
|
+
});
|
|
218
|
+
urlsCrawled.scannedRedirects.push({
|
|
219
|
+
fromUrl: request.url,
|
|
220
|
+
toUrl: actualUrl,
|
|
221
|
+
});
|
|
222
|
+
results.url = request.url;
|
|
223
|
+
results.actualUrl = actualUrl;
|
|
224
|
+
await dataset.pushData(results);
|
|
225
|
+
}
|
|
226
|
+
else {
|
|
199
227
|
guiInfoLog(guiInfoStatusTypes.SKIPPED, {
|
|
200
228
|
numScanned: urlsCrawled.scanned.length,
|
|
201
229
|
urlScanned: request.url,
|
|
202
230
|
});
|
|
203
|
-
|
|
231
|
+
if (isScanHtml) {
|
|
232
|
+
// carry through the HTTP status metadata
|
|
233
|
+
const status = response?.status();
|
|
234
|
+
const metadata = typeof status === 'number'
|
|
235
|
+
? STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599]
|
|
236
|
+
: STATUS_CODE_METADATA[2];
|
|
237
|
+
urlsCrawled.invalid.push({
|
|
238
|
+
actualUrl,
|
|
239
|
+
url: request.url,
|
|
240
|
+
pageTitle: request.url,
|
|
241
|
+
metadata,
|
|
242
|
+
httpStatusCode: typeof status === 'number' ? status : 0,
|
|
243
|
+
});
|
|
244
|
+
}
|
|
204
245
|
}
|
|
205
|
-
const results = await runAxeScript({ includeScreenshots, page, randomToken });
|
|
206
|
-
guiInfoLog(guiInfoStatusTypes.SCANNED, {
|
|
207
|
-
numScanned: urlsCrawled.scanned.length,
|
|
208
|
-
urlScanned: request.url,
|
|
209
|
-
});
|
|
210
|
-
urlsCrawled.scanned.push({
|
|
211
|
-
url: request.url,
|
|
212
|
-
pageTitle: results.pageTitle,
|
|
213
|
-
actualUrl, // i.e. actualUrl
|
|
214
|
-
});
|
|
215
|
-
urlsCrawled.scannedRedirects.push({
|
|
216
|
-
fromUrl: request.url,
|
|
217
|
-
toUrl: actualUrl,
|
|
218
|
-
});
|
|
219
|
-
results.url = request.url;
|
|
220
|
-
results.actualUrl = actualUrl;
|
|
221
|
-
await dataset.pushData(results);
|
|
222
246
|
}
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
const status = response?.status();
|
|
231
|
-
const metadata = typeof status === 'number'
|
|
232
|
-
? STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599]
|
|
233
|
-
: STATUS_CODE_METADATA[2];
|
|
234
|
-
urlsCrawled.invalid.push({
|
|
235
|
-
actualUrl,
|
|
247
|
+
catch (e) {
|
|
248
|
+
if (!isAbortingScan) {
|
|
249
|
+
guiInfoLog(guiInfoStatusTypes.ERROR, {
|
|
250
|
+
numScanned: urlsCrawled.scanned.length,
|
|
251
|
+
urlScanned: request.url,
|
|
252
|
+
});
|
|
253
|
+
urlsCrawled.error.push({
|
|
236
254
|
url: request.url,
|
|
237
255
|
pageTitle: request.url,
|
|
238
|
-
|
|
239
|
-
|
|
256
|
+
actualUrl: request.url,
|
|
257
|
+
metadata: STATUS_CODE_METADATA[2],
|
|
258
|
+
httpStatusCode: 0,
|
|
240
259
|
});
|
|
241
260
|
}
|
|
242
261
|
}
|
|
243
262
|
},
|
|
244
263
|
failedRequestHandler: async ({ request, response, error }) => {
|
|
245
|
-
|
|
246
|
-
if (urlsCrawled.scanned.length >= maxRequestsPerCrawl) {
|
|
264
|
+
if (isAbortingScan) {
|
|
247
265
|
return;
|
|
248
266
|
}
|
|
249
267
|
guiInfoLog(guiInfoStatusTypes.ERROR, {
|
|
@@ -62,14 +62,19 @@ export const screenshotFullPage = async (page, screenshotsDir, screenshotIdx) =>
|
|
|
62
62
|
await page.evaluate(() => {
|
|
63
63
|
window.scrollTo(0, document.body.scrollHeight);
|
|
64
64
|
});
|
|
65
|
-
const isLoadMoreContent = async () =>
|
|
66
|
-
|
|
65
|
+
const isLoadMoreContent = async () => {
|
|
66
|
+
await new Promise(resolve => setTimeout(resolve, 2500));
|
|
67
|
+
if (page.isClosed())
|
|
68
|
+
return false;
|
|
69
|
+
try {
|
|
67
70
|
await page.waitForLoadState('domcontentloaded');
|
|
68
71
|
const newHeight = await page.evaluate(() => document.body.scrollHeight);
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
72
|
+
return newHeight > prevHeight;
|
|
73
|
+
}
|
|
74
|
+
catch {
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
};
|
|
73
78
|
const result = await isLoadMoreContent();
|
|
74
79
|
return result;
|
|
75
80
|
};
|
|
@@ -291,22 +296,60 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
|
|
|
291
296
|
const h2 = document.createElement('h2');
|
|
292
297
|
h2.id = 'oobeeHPagesScanned';
|
|
293
298
|
h2.className = 'oobee-section-title';
|
|
294
|
-
h2.textContent =
|
|
299
|
+
h2.textContent = `Pages Scanned (${vars.urlsCrawled.scanned.length || 0})`;
|
|
300
|
+
const scanIcon = document.createElement('span');
|
|
301
|
+
scanIcon.className = 'oobee-btn-icon';
|
|
302
|
+
const SCAN_SVG = `
|
|
303
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 20 20" fill="none">
|
|
304
|
+
<g clip-path="url(#clip0_1421_431)">
|
|
305
|
+
<path d="M12.5763 11.5472L12.2958 11.2857L12.1037 11.1005C12.776 10.3183 12.9194 9.56432 12.9194 8.45969C12.9194 5.99657 10.9228 4 8.45969 4C5.99657 4 4 5.99657 4 8.45969C4 10.9228 5.99657 12.9194 8.45969 12.9194C9.56432 12.9194 10.3183 12.776 11.1005 12.1037L11.2857 12.2958L11.5472 12.5763L14.9777 16L16 14.9777L12.5763 11.5472ZM8.45969 11.5472C6.75129 11.5472 5.37221 10.1681 5.37221 8.45969C5.37221 6.75129 6.75129 5.37221 8.45969 5.37221C10.1681 5.37221 11.5472 6.75129 11.5472 8.45969C11.5472 10.1681 10.1681 11.5472 8.45969 11.5472Z" fill="white"/>
|
|
306
|
+
<path d="M18.5 0H19.5C19.7761 0 20 0.223858 20 0.5V5H18.5V0Z" fill="white"/>
|
|
307
|
+
<path d="M19.5 2.18552e-08L19.5 1.5L15 1.5L15 -2.18556e-07L19.5 2.18552e-08Z" fill="white"/>
|
|
308
|
+
<path d="M1.5 0H0.5C0.223858 0 0 0.223858 0 0.5V5H1.5V0Z" fill="white"/>
|
|
309
|
+
<path d="M0.5 2.18552e-08L0.5 1.5L5 1.5L5 -2.18556e-07L0.5 2.18552e-08Z" fill="white"/>
|
|
310
|
+
<path d="M1.5 20H0.5C0.223858 20 0 19.7761 0 19.5V15H1.5V20Z" fill="white"/>
|
|
311
|
+
<path d="M0.5 20L0.5 18.5L5 18.5L5 20L0.5 20Z" fill="white"/>
|
|
312
|
+
<path d="M18.5 20H19.5C19.7761 20 20 19.7761 20 19.5V15H18.5V20Z" fill="white"/>
|
|
313
|
+
<path d="M19.5 20L19.5 18.5L15 18.5L15 20L19.5 20Z" fill="white"/>
|
|
314
|
+
</g>
|
|
315
|
+
<defs>
|
|
316
|
+
<clipPath id="clip0_1421_431">
|
|
317
|
+
<rect width="20" height="20" fill="white"/>
|
|
318
|
+
</clipPath>
|
|
319
|
+
</defs>
|
|
320
|
+
</svg>
|
|
321
|
+
`;
|
|
322
|
+
scanIcon.innerHTML = SCAN_SVG;
|
|
295
323
|
const scanBtn = document.createElement('button');
|
|
296
324
|
scanBtn.id = 'oobeeBtnScan';
|
|
297
325
|
scanBtn.className = 'oobee-btn oobee-btn-primary';
|
|
298
|
-
scanBtn.innerText = 'Scan this page';
|
|
299
326
|
scanBtn.disabled = inProgress;
|
|
327
|
+
scanBtn.appendChild(scanIcon);
|
|
328
|
+
const scanText = document.createElement('span');
|
|
329
|
+
scanText.className = 'oobee-btn-text';
|
|
330
|
+
scanText.innerText = 'Scan page';
|
|
331
|
+
scanBtn.appendChild(scanText);
|
|
300
332
|
scanBtn.addEventListener('click', async () => customWindow.handleOnScanClick?.());
|
|
301
|
-
const
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
333
|
+
const endScanIcon = document.createElement('span');
|
|
334
|
+
endScanIcon.className = 'oobee-btn-icon';
|
|
335
|
+
const ENDSCAN_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 20 20" fill="none">
|
|
336
|
+
<path d="M10 0C4.47 0 0 4.47 0 10C0 15.53 4.47 20 10 20C15.53 20 20 15.53 20 10C20 4.47 15.53 0 10 0ZM10 18C5.59 18 2 14.41 2 10C2 5.59 5.59 2 10 2C14.41 2 18 5.59 18 10C18 14.41 14.41 18 10 18ZM13.59 5L10 8.59L6.41 5L5 6.41L8.59 10L5 13.59L6.41 15L10 11.41L13.59 15L15 13.59L11.41 10L15 6.41L13.59 5Z" fill="#9021A6"/>
|
|
337
|
+
</svg>
|
|
338
|
+
`;
|
|
339
|
+
endScanIcon.innerHTML = ENDSCAN_SVG;
|
|
340
|
+
const endScanBtn = document.createElement('button');
|
|
341
|
+
endScanBtn.id = 'oobeeBtnEndScan';
|
|
342
|
+
endScanBtn.className = 'oobee-btn oobee-btn-secondary';
|
|
343
|
+
endScanBtn.appendChild(endScanIcon);
|
|
344
|
+
const endScanText = document.createElement('span');
|
|
345
|
+
endScanText.className = 'oobee-btn-text';
|
|
346
|
+
endScanText.innerText = 'End scan';
|
|
347
|
+
endScanBtn.appendChild(endScanText);
|
|
348
|
+
endScanBtn.addEventListener('click', async () => customWindow.handleOnStopClick?.());
|
|
306
349
|
const btnGroup = document.createElement('div');
|
|
307
350
|
btnGroup.className = 'oobee-actions';
|
|
308
351
|
btnGroup.appendChild(scanBtn);
|
|
309
|
-
btnGroup.appendChild(
|
|
352
|
+
btnGroup.appendChild(endScanBtn);
|
|
310
353
|
const listWrap = document.createElement('div');
|
|
311
354
|
listWrap.id = 'oobeeList';
|
|
312
355
|
listWrap.className = 'oobee-list';
|
|
@@ -370,7 +413,7 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
|
|
|
370
413
|
border-right: 1px solid rgba(0,0,0,.08)
|
|
371
414
|
}
|
|
372
415
|
.oobee-panel.collapsed {
|
|
373
|
-
width:
|
|
416
|
+
width: 58px;
|
|
374
417
|
overflow: hidden
|
|
375
418
|
}
|
|
376
419
|
|
|
@@ -447,6 +490,12 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
|
|
|
447
490
|
padding: 1rem;
|
|
448
491
|
}
|
|
449
492
|
|
|
493
|
+
.oobee-panel.collapsed .oobee-actions {
|
|
494
|
+
display: flex;
|
|
495
|
+
justify-content: center;
|
|
496
|
+
padding: 1rem 0.7rem;
|
|
497
|
+
}
|
|
498
|
+
|
|
450
499
|
/* Base button */
|
|
451
500
|
.oobee-btn {
|
|
452
501
|
width: 100%;
|
|
@@ -457,6 +506,10 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
|
|
|
457
506
|
line-height: 1.2;
|
|
458
507
|
font-weight: 400;
|
|
459
508
|
cursor: pointer;
|
|
509
|
+
display: flex;
|
|
510
|
+
align-items: center;
|
|
511
|
+
justify-content: center;
|
|
512
|
+
gap: 10px;
|
|
460
513
|
transition: {
|
|
461
514
|
box-shadow .12s ease,
|
|
462
515
|
transform .02s ease,
|
|
@@ -470,6 +523,19 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
|
|
|
470
523
|
cursor:not-allowed
|
|
471
524
|
}
|
|
472
525
|
|
|
526
|
+
.oobee-panel.collapsed .oobee-btn {
|
|
527
|
+
width: 44px !important;
|
|
528
|
+
height: 44px !important;
|
|
529
|
+
min-width: 44px !important;
|
|
530
|
+
min-height: 44px !important;
|
|
531
|
+
max-width: 44px !important;
|
|
532
|
+
max-height: 44px !important;
|
|
533
|
+
border-radius: 50% !important;
|
|
534
|
+
padding: 0 !important;
|
|
535
|
+
justify-content: center;
|
|
536
|
+
gap: 0;
|
|
537
|
+
}
|
|
538
|
+
|
|
473
539
|
/* Primary (filled) */
|
|
474
540
|
.oobee-btn-primary {
|
|
475
541
|
background: #9021a6;
|
|
@@ -525,6 +591,25 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
|
|
|
525
591
|
display: none;
|
|
526
592
|
}
|
|
527
593
|
|
|
594
|
+
.oobee-btn-icon {
|
|
595
|
+
display: inline-flex;
|
|
596
|
+
align-items: center;
|
|
597
|
+
justify-content: center;
|
|
598
|
+
width: 20px;
|
|
599
|
+
height: 20px;
|
|
600
|
+
vertical-align: middle;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
.oobee-btn-text {
|
|
604
|
+
display: inline;
|
|
605
|
+
white-space: nowrap;
|
|
606
|
+
vertical-align: middle;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
.oobee-panel.collapsed .oobee-btn-text {
|
|
610
|
+
display: none;
|
|
611
|
+
}
|
|
612
|
+
|
|
528
613
|
#oobeeStopOverlay[hidden] {
|
|
529
614
|
display:none !important;
|
|
530
615
|
}
|
|
@@ -542,7 +627,10 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
|
|
|
542
627
|
}
|
|
543
628
|
|
|
544
629
|
.oobee-panel.collapsed .oobee-section-title {
|
|
545
|
-
|
|
630
|
+
font-size: 14px;
|
|
631
|
+
display: flex;
|
|
632
|
+
justify-content: center;
|
|
633
|
+
text-align: center;
|
|
546
634
|
}
|
|
547
635
|
|
|
548
636
|
.oobee-ol {
|
|
@@ -921,7 +1009,14 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
|
|
|
921
1009
|
let menuPos = MENU_POSITION.right;
|
|
922
1010
|
// eslint-disable-next-line no-underscore-dangle
|
|
923
1011
|
const pageId = page._guid;
|
|
924
|
-
page.on('dialog', () => {
|
|
1012
|
+
page.on('dialog', async (dialog) => {
|
|
1013
|
+
try {
|
|
1014
|
+
await dialog.dismiss();
|
|
1015
|
+
}
|
|
1016
|
+
catch {
|
|
1017
|
+
// dialog may already be closed
|
|
1018
|
+
}
|
|
1019
|
+
});
|
|
925
1020
|
const pageClosePromise = new Promise(resolve => {
|
|
926
1021
|
page.on('close', () => {
|
|
927
1022
|
log(`Page: close detected: ${page.url()}`);
|
|
@@ -947,6 +1042,8 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
|
|
|
947
1042
|
await processPage(page, processPageParams);
|
|
948
1043
|
log('Scan: success');
|
|
949
1044
|
pagesDict[pageId].isScanning = false;
|
|
1045
|
+
if (page.isClosed())
|
|
1046
|
+
return;
|
|
950
1047
|
const allowed = isOverlayAllowed(page.url(), processPageParams.entryUrl);
|
|
951
1048
|
if (allowed) {
|
|
952
1049
|
await addOverlayMenu(page, processPageParams.urlsCrawled, menuPos, {
|
|
@@ -987,10 +1084,10 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
|
|
|
987
1084
|
});
|
|
988
1085
|
if (!inputValue?.confirmed) {
|
|
989
1086
|
await page.evaluate(() => {
|
|
990
|
-
const
|
|
991
|
-
if (
|
|
992
|
-
|
|
993
|
-
|
|
1087
|
+
const endScanBtn = document.getElementById('oobeeBtnEndScan');
|
|
1088
|
+
if (endScanBtn) {
|
|
1089
|
+
endScanBtn.disabled = false;
|
|
1090
|
+
endScanBtn.textContent = 'Stop';
|
|
994
1091
|
}
|
|
995
1092
|
});
|
|
996
1093
|
return;
|
|
@@ -1019,6 +1116,8 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
|
|
|
1019
1116
|
}
|
|
1020
1117
|
};
|
|
1021
1118
|
page.on('domcontentloaded', async () => {
|
|
1119
|
+
if (page.isClosed())
|
|
1120
|
+
return;
|
|
1022
1121
|
try {
|
|
1023
1122
|
const allowed = isOverlayAllowed(page.url(), processPageParams.entryUrl);
|
|
1024
1123
|
if (!allowed) {
|
|
@@ -1051,15 +1150,22 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
|
|
|
1051
1150
|
consoleLogger.info('Error in adding overlay menu to page');
|
|
1052
1151
|
}
|
|
1053
1152
|
});
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1153
|
+
try {
|
|
1154
|
+
if (page.isClosed())
|
|
1155
|
+
return page;
|
|
1156
|
+
await page.exposeFunction('handleOnScanClick', handleOnScanClick);
|
|
1157
|
+
await page.exposeFunction('handleOnStopClick', handleOnStopClick);
|
|
1158
|
+
// Define the updateMenuPos function
|
|
1159
|
+
const updateMenuPos = newPos => {
|
|
1160
|
+
const prevPos = menuPos;
|
|
1161
|
+
if (prevPos !== newPos) {
|
|
1162
|
+
menuPos = newPos;
|
|
1163
|
+
}
|
|
1164
|
+
};
|
|
1165
|
+
await page.exposeFunction('updateMenuPos', updateMenuPos);
|
|
1166
|
+
}
|
|
1167
|
+
catch (e) {
|
|
1168
|
+
log(`Error exposing functions on page: ${e}`);
|
|
1169
|
+
}
|
|
1064
1170
|
return page;
|
|
1065
1171
|
};
|
|
@@ -2,15 +2,16 @@ const ALLOWED_PROTOCOLS = new Set(['http:', 'https:']);
|
|
|
2
2
|
export function addUrlGuardScript(context, opts = {}) {
|
|
3
3
|
const { fallbackUrl } = opts;
|
|
4
4
|
const lastAllowedUrlByPage = new WeakMap();
|
|
5
|
-
const attachGuardsToPage =
|
|
5
|
+
const attachGuardsToPage = page => {
|
|
6
6
|
if (!lastAllowedUrlByPage.has(page) && fallbackUrl) {
|
|
7
7
|
lastAllowedUrlByPage.set(page, String(fallbackUrl));
|
|
8
8
|
}
|
|
9
|
-
page
|
|
10
|
-
|
|
9
|
+
page
|
|
10
|
+
.addInitScript(() => {
|
|
11
|
+
const isAllowedProtocol = value => {
|
|
11
12
|
try {
|
|
12
13
|
const s = value instanceof URL ? value.toString() : String(value);
|
|
13
|
-
const protocol = new URL(s, window.location.href)
|
|
14
|
+
const { protocol } = new URL(s, window.location.href);
|
|
14
15
|
return protocol === 'http:' || protocol === 'https:';
|
|
15
16
|
}
|
|
16
17
|
catch {
|
|
@@ -24,17 +25,9 @@ export function addUrlGuardScript(context, opts = {}) {
|
|
|
24
25
|
return null;
|
|
25
26
|
return openOriginal.call(this, targetUrl, ...args);
|
|
26
27
|
};
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
assignOriginal(nextUrl); };
|
|
31
|
-
win.location.replace = (nextUrl) => { if (isAllowedProtocol(nextUrl))
|
|
32
|
-
replaceOriginal(nextUrl); };
|
|
33
|
-
Object.defineProperty(win.location, 'href', {
|
|
34
|
-
get() { return String(win.location.toString()); },
|
|
35
|
-
set(nextUrl) { if (isAllowedProtocol(nextUrl))
|
|
36
|
-
assignOriginal(nextUrl); },
|
|
37
|
-
});
|
|
28
|
+
})
|
|
29
|
+
.catch(() => {
|
|
30
|
+
// page may have closed before addInitScript completed; safe to ignore
|
|
38
31
|
});
|
|
39
32
|
const restoreToSafeUrl = async (page, attemptedUrl) => {
|
|
40
33
|
try {
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
/* eslint-env browser */
|
|
2
|
-
import { chromium } from 'playwright';
|
|
3
2
|
import { createCrawleeSubFolders } from './commonCrawlerFunc.js';
|
|
4
3
|
import { cleanUpAndExit, register, registerSoftClose } from '../utils.js';
|
|
5
4
|
import constants, { getIntermediateScreenshotsPath, guiInfoStatusTypes, } from '../constants/constants.js';
|
|
6
5
|
import { initNewPage, log } from './custom/utils.js';
|
|
7
6
|
import { guiInfoLog } from '../logs.js';
|
|
8
7
|
import { addUrlGuardScript } from './guards/urlGuard.js';
|
|
9
|
-
import { getPlaywrightLaunchOptions } from '../constants/common.js';
|
|
8
|
+
import { getBrowserToRun, getPlaywrightLaunchOptions, initModifiedUserAgent, } from '../constants/common.js';
|
|
10
9
|
// Export of classes
|
|
11
10
|
export class ProcessPageParams {
|
|
12
11
|
constructor(scannedIdx, blacklistedPatterns, includeScreenshots, dataset, intermediateScreenshotsPath, urlsCrawled, randomToken) {
|
|
@@ -19,7 +18,7 @@ export class ProcessPageParams {
|
|
|
19
18
|
this.randomToken = randomToken;
|
|
20
19
|
}
|
|
21
20
|
}
|
|
22
|
-
const runCustom = async (url, randomToken, viewportSettings, blacklistedPatterns, includeScreenshots, initialCustomFlowLabel) => {
|
|
21
|
+
const runCustom = async (url, randomToken, browserToRun, userDataDirectory, viewportSettings, blacklistedPatterns, includeScreenshots, initialCustomFlowLabel) => {
|
|
23
22
|
// checks and delete datasets path if it already exists
|
|
24
23
|
process.env.CRAWLEE_STORAGE_DIR = randomToken;
|
|
25
24
|
const urlsCrawled = { ...constants.urlsCrawledObj };
|
|
@@ -34,9 +33,13 @@ const runCustom = async (url, randomToken, viewportSettings, blacklistedPatterns
|
|
|
34
33
|
const pagesDict = {};
|
|
35
34
|
const pageClosePromises = [];
|
|
36
35
|
try {
|
|
36
|
+
const { browserToRun: resolvedBrowserToRun } = getBrowserToRun(randomToken, browserToRun, false);
|
|
37
37
|
const deviceConfig = viewportSettings.playwrightDeviceDetailsObject;
|
|
38
38
|
const hasCustomViewport = !!deviceConfig;
|
|
39
|
-
const
|
|
39
|
+
const rawDevice = (deviceConfig || {});
|
|
40
|
+
const { userAgent: deviceUserAgent, ...contextDeviceOptions } = rawDevice;
|
|
41
|
+
await initModifiedUserAgent(resolvedBrowserToRun, viewportSettings.playwrightDeviceDetailsObject);
|
|
42
|
+
const baseLaunchOptions = getPlaywrightLaunchOptions(resolvedBrowserToRun);
|
|
40
43
|
// Merge base args with custom flow specific args
|
|
41
44
|
const baseArgs = baseLaunchOptions.args || [];
|
|
42
45
|
const customArgs = hasCustomViewport ? ['--window-size=1920,1040'] : ['--start-maximized'];
|
|
@@ -44,33 +47,37 @@ const runCustom = async (url, randomToken, viewportSettings, blacklistedPatterns
|
|
|
44
47
|
...baseArgs.filter(a => !a.startsWith('--window-size') && a !== '--start-maximized'),
|
|
45
48
|
...customArgs,
|
|
46
49
|
];
|
|
47
|
-
const
|
|
50
|
+
const context = await constants.launcher.launchPersistentContext(userDataDirectory, {
|
|
48
51
|
...baseLaunchOptions,
|
|
49
52
|
args: mergedArgs,
|
|
50
53
|
headless: false,
|
|
51
|
-
});
|
|
52
|
-
const context = await browser.newContext({
|
|
53
54
|
ignoreHTTPSErrors: true,
|
|
54
55
|
serviceWorkers: 'block',
|
|
55
56
|
viewport: null,
|
|
56
|
-
...(hasCustomViewport ?
|
|
57
|
+
...(hasCustomViewport ? contextDeviceOptions : {}),
|
|
58
|
+
userAgent: process.env.OOBEE_USER_AGENT || deviceUserAgent,
|
|
57
59
|
});
|
|
58
60
|
register(context);
|
|
59
61
|
processPageParams.stopAll = async () => {
|
|
60
62
|
try {
|
|
61
63
|
await context.close().catch(() => { });
|
|
62
|
-
await browser.close().catch(() => { });
|
|
63
64
|
}
|
|
64
65
|
catch { }
|
|
65
66
|
};
|
|
66
67
|
// For handling closing playwright browser and continue generate artifacts etc
|
|
67
68
|
registerSoftClose(processPageParams.stopAll);
|
|
68
69
|
addUrlGuardScript(context, { fallbackUrl: url });
|
|
70
|
+
const page = context.pages().find(existingPage => !existingPage.isClosed()) || (await context.newPage());
|
|
71
|
+
await initNewPage(page, pageClosePromises, processPageParams, pagesDict);
|
|
69
72
|
// Detection of new page
|
|
70
73
|
context.on('page', async (newPage) => {
|
|
71
|
-
|
|
74
|
+
try {
|
|
75
|
+
await initNewPage(newPage, pageClosePromises, processPageParams, pagesDict);
|
|
76
|
+
}
|
|
77
|
+
catch (e) {
|
|
78
|
+
log(`Error initializing new page: ${e}`);
|
|
79
|
+
}
|
|
72
80
|
});
|
|
73
|
-
const page = await context.newPage();
|
|
74
81
|
await page.goto(url, { timeout: 0 });
|
|
75
82
|
// to execute and wait for all pages to close
|
|
76
83
|
// idea is for promise to be pending until page.on('close') detected
|