@govtechsg/oobee 0.10.85 → 0.10.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/.github/workflows/image.yml +3 -2
  2. package/.github/workflows/publish.yml +10 -0
  3. package/DETAILS.md +29 -0
  4. package/dist/combine.js +1 -1
  5. package/dist/constants/common.js +15 -4
  6. package/dist/constants/constants.js +604 -1
  7. package/dist/crawlers/commonCrawlerFunc.js +3 -2
  8. package/dist/crawlers/crawlSitemap.js +98 -80
  9. package/dist/crawlers/custom/utils.js +137 -31
  10. package/dist/crawlers/guards/urlGuard.js +8 -15
  11. package/dist/crawlers/runCustom.js +18 -11
  12. package/dist/generateOobeeClientScanner.js +570 -0
  13. package/dist/mergeAxeResults.js +5 -4
  14. package/dist/npmIndex.js +10 -2
  15. package/dist/proxyService.js +18 -3
  16. package/dist/services/s3Uploader.js +21 -10
  17. package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
  18. package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
  19. package/dist/static/ejs/summary.ejs +10 -5
  20. package/oobee-client-scanner.js +34992 -0
  21. package/package.json +2 -2
  22. package/src/combine.ts +3 -1
  23. package/src/constants/common.ts +22 -10
  24. package/src/constants/constants.ts +602 -1
  25. package/src/crawlers/commonCrawlerFunc.ts +4 -3
  26. package/src/crawlers/crawlSitemap.ts +116 -98
  27. package/src/crawlers/custom/utils.ts +143 -38
  28. package/src/crawlers/guards/urlGuard.ts +24 -31
  29. package/src/crawlers/runCustom.ts +29 -11
  30. package/src/generateOobeeClientScanner.ts +591 -0
  31. package/src/mergeAxeResults.ts +5 -3
  32. package/src/npmIndex.ts +12 -2
  33. package/src/proxyService.ts +25 -4
  34. package/src/services/s3Uploader.ts +23 -11
  35. package/src/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +2 -2
  36. package/src/static/ejs/partials/scripts/ruleModal/constants.ejs +1 -761
  37. package/src/static/ejs/summary.ejs +10 -5
  38. package/testStaticJSScanner.html +534 -0
@@ -116,7 +116,7 @@ export const filterAxeResults = (results, pageTitle, customFlowDetails) => {
116
116
  return;
117
117
  const conformance = tags.filter(tag => tag.startsWith('wcag') || tag === 'best-practice');
118
118
  nodes.forEach(node => {
119
- const { html } = node;
119
+ const { html, target } = node;
120
120
  if (!(rule in passed.rules)) {
121
121
  passed.rules[rule] = {
122
122
  description,
@@ -128,7 +128,8 @@ export const filterAxeResults = (results, pageTitle, customFlowDetails) => {
128
128
  };
129
129
  }
130
130
  const finalHtml = truncateHtml(html);
131
- passed.rules[rule].items.push({ html: finalHtml, screenshotPath: '', message: '', xpath: '' });
131
+ const xpath = target.length === 1 && typeof target[0] === 'string' ? target[0] : undefined;
132
+ passed.rules[rule].items.push({ html: finalHtml, screenshotPath: '', message: '', xpath: xpath || '' });
132
133
  passed.totalItems += 1;
133
134
  passed.rules[rule].totalItems += 1;
134
135
  totalItems += 1;
@@ -12,6 +12,7 @@ const crawlSitemap = async ({ sitemapUrl, randomToken, host, viewportSettings, m
12
12
  let dataset;
13
13
  let urlsCrawled;
14
14
  let durationExceeded = false;
15
+ let isAbortingScan = false;
15
16
  if (fromCrawlIntelligentSitemap) {
16
17
  dataset = datasetFromIntelligent;
17
18
  urlsCrawled = urlsCrawledFromIntelligent;
@@ -144,106 +145,123 @@ const crawlSitemap = async ({ sitemapUrl, randomToken, host, viewportSettings, m
144
145
  });
145
146
  return;
146
147
  }
147
- await waitForPageLoaded(page, 10000);
148
- const actualUrl = page.url() || request.loadedUrl || request.url;
149
- const hasExceededDuration = scanDuration > 0 && Date.now() - crawlStartTime > scanDuration * 1000;
150
- if (urlsCrawled.scanned.length >= maxRequestsPerCrawl || hasExceededDuration) {
151
- if (hasExceededDuration) {
152
- console.log(`Crawl duration of ${scanDuration}s exceeded. Aborting sitemap crawl.`);
153
- durationExceeded = true;
154
- }
155
- crawler.autoscaledPool.abort(); // stops new requests
156
- return;
157
- }
158
- if (request.skipNavigation && actualUrl === 'about:blank') {
159
- if (isScanPdfs) {
160
- // pushes download promise into pdfDownloads
161
- const { pdfFileName, url } = handlePdfDownload(randomToken, pdfDownloads, request, sendRequest, urlsCrawled);
162
- uuidToPdfMapping[pdfFileName] = url;
148
+ try {
149
+ await waitForPageLoaded(page, 10000);
150
+ const actualUrl = page.url() || request.loadedUrl || request.url;
151
+ const hasExceededDuration = scanDuration > 0 && Date.now() - crawlStartTime > scanDuration * 1000;
152
+ if (urlsCrawled.scanned.length >= maxRequestsPerCrawl || hasExceededDuration) {
153
+ isAbortingScan = true;
154
+ if (hasExceededDuration) {
155
+ console.log(`Crawl duration of ${scanDuration}s exceeded. Aborting sitemap crawl.`);
156
+ durationExceeded = true;
157
+ }
158
+ crawler.autoscaledPool.abort(); // stops new requests
163
159
  return;
164
160
  }
165
- guiInfoLog(guiInfoStatusTypes.SKIPPED, {
166
- numScanned: urlsCrawled.scanned.length,
167
- urlScanned: request.url,
168
- });
169
- urlsCrawled.userExcluded.push({
170
- url: request.url,
171
- pageTitle: request.url,
172
- actualUrl: request.url, // because about:blank is not useful
173
- metadata: STATUS_CODE_METADATA[1],
174
- httpStatusCode: 1,
175
- });
176
- return;
177
- }
178
- const contentType = response?.headers?.()['content-type'] || '';
179
- const status = response ? response.status() : 0;
180
- if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
181
- const isRedirected = !areLinksEqual(page.url(), request.url);
182
- const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(item => (item.actualUrl || item.url) === page.url());
183
- if (isRedirected && isLoadedUrlInCrawledUrls) {
184
- urlsCrawled.notScannedRedirects.push({
185
- fromUrl: request.url,
186
- toUrl: actualUrl, // i.e. actualUrl
161
+ if (request.skipNavigation && actualUrl === 'about:blank') {
162
+ if (isScanPdfs) {
163
+ // pushes download promise into pdfDownloads
164
+ const { pdfFileName, url } = handlePdfDownload(randomToken, pdfDownloads, request, sendRequest, urlsCrawled);
165
+ uuidToPdfMapping[pdfFileName] = url;
166
+ return;
167
+ }
168
+ guiInfoLog(guiInfoStatusTypes.SKIPPED, {
169
+ numScanned: urlsCrawled.scanned.length,
170
+ urlScanned: request.url,
187
171
  });
188
- return;
189
- }
190
- // This logic is different from crawlDomain, as it also checks if the pae is redirected before checking if it is excluded using exclusions.txt
191
- if (isRedirected && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
192
172
  urlsCrawled.userExcluded.push({
193
173
  url: request.url,
194
174
  pageTitle: request.url,
195
- actualUrl,
196
- metadata: STATUS_CODE_METADATA[0],
197
- httpStatusCode: 0,
175
+ actualUrl: request.url, // because about:blank is not useful
176
+ metadata: STATUS_CODE_METADATA[1],
177
+ httpStatusCode: 1,
198
178
  });
179
+ return;
180
+ }
181
+ const contentType = response?.headers?.()['content-type'] || '';
182
+ const status = response ? response.status() : 0;
183
+ if (isScanHtml && status < 300 && isWhitelistedContentType(contentType)) {
184
+ const isRedirected = !areLinksEqual(page.url(), request.url);
185
+ const isLoadedUrlInCrawledUrls = urlsCrawled.scanned.some(item => (item.actualUrl || item.url) === page.url());
186
+ if (isRedirected && isLoadedUrlInCrawledUrls) {
187
+ urlsCrawled.notScannedRedirects.push({
188
+ fromUrl: request.url,
189
+ toUrl: actualUrl, // i.e. actualUrl
190
+ });
191
+ return;
192
+ }
193
+ // This logic is different from crawlDomain, as it also checks if the pae is redirected before checking if it is excluded using exclusions.txt
194
+ if (isRedirected && blacklistedPatterns && isSkippedUrl(actualUrl, blacklistedPatterns)) {
195
+ urlsCrawled.userExcluded.push({
196
+ url: request.url,
197
+ pageTitle: request.url,
198
+ actualUrl,
199
+ metadata: STATUS_CODE_METADATA[0],
200
+ httpStatusCode: 0,
201
+ });
202
+ guiInfoLog(guiInfoStatusTypes.SKIPPED, {
203
+ numScanned: urlsCrawled.scanned.length,
204
+ urlScanned: request.url,
205
+ });
206
+ return;
207
+ }
208
+ const results = await runAxeScript({ includeScreenshots, page, randomToken });
209
+ guiInfoLog(guiInfoStatusTypes.SCANNED, {
210
+ numScanned: urlsCrawled.scanned.length,
211
+ urlScanned: request.url,
212
+ });
213
+ urlsCrawled.scanned.push({
214
+ url: request.url,
215
+ pageTitle: results.pageTitle,
216
+ actualUrl, // i.e. actualUrl
217
+ });
218
+ urlsCrawled.scannedRedirects.push({
219
+ fromUrl: request.url,
220
+ toUrl: actualUrl,
221
+ });
222
+ results.url = request.url;
223
+ results.actualUrl = actualUrl;
224
+ await dataset.pushData(results);
225
+ }
226
+ else {
199
227
  guiInfoLog(guiInfoStatusTypes.SKIPPED, {
200
228
  numScanned: urlsCrawled.scanned.length,
201
229
  urlScanned: request.url,
202
230
  });
203
- return;
231
+ if (isScanHtml) {
232
+ // carry through the HTTP status metadata
233
+ const status = response?.status();
234
+ const metadata = typeof status === 'number'
235
+ ? STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599]
236
+ : STATUS_CODE_METADATA[2];
237
+ urlsCrawled.invalid.push({
238
+ actualUrl,
239
+ url: request.url,
240
+ pageTitle: request.url,
241
+ metadata,
242
+ httpStatusCode: typeof status === 'number' ? status : 0,
243
+ });
244
+ }
204
245
  }
205
- const results = await runAxeScript({ includeScreenshots, page, randomToken });
206
- guiInfoLog(guiInfoStatusTypes.SCANNED, {
207
- numScanned: urlsCrawled.scanned.length,
208
- urlScanned: request.url,
209
- });
210
- urlsCrawled.scanned.push({
211
- url: request.url,
212
- pageTitle: results.pageTitle,
213
- actualUrl, // i.e. actualUrl
214
- });
215
- urlsCrawled.scannedRedirects.push({
216
- fromUrl: request.url,
217
- toUrl: actualUrl,
218
- });
219
- results.url = request.url;
220
- results.actualUrl = actualUrl;
221
- await dataset.pushData(results);
222
246
  }
223
- else {
224
- guiInfoLog(guiInfoStatusTypes.SKIPPED, {
225
- numScanned: urlsCrawled.scanned.length,
226
- urlScanned: request.url,
227
- });
228
- if (isScanHtml) {
229
- // carry through the HTTP status metadata
230
- const status = response?.status();
231
- const metadata = typeof status === 'number'
232
- ? STATUS_CODE_METADATA[status] || STATUS_CODE_METADATA[599]
233
- : STATUS_CODE_METADATA[2];
234
- urlsCrawled.invalid.push({
235
- actualUrl,
247
+ catch (e) {
248
+ if (!isAbortingScan) {
249
+ guiInfoLog(guiInfoStatusTypes.ERROR, {
250
+ numScanned: urlsCrawled.scanned.length,
251
+ urlScanned: request.url,
252
+ });
253
+ urlsCrawled.error.push({
236
254
  url: request.url,
237
255
  pageTitle: request.url,
238
- metadata,
239
- httpStatusCode: typeof status === 'number' ? status : 0,
256
+ actualUrl: request.url,
257
+ metadata: STATUS_CODE_METADATA[2],
258
+ httpStatusCode: 0,
240
259
  });
241
260
  }
242
261
  }
243
262
  },
244
263
  failedRequestHandler: async ({ request, response, error }) => {
245
- // check if scanned pages have reached limit due to multi-instances of handler running
246
- if (urlsCrawled.scanned.length >= maxRequestsPerCrawl) {
264
+ if (isAbortingScan) {
247
265
  return;
248
266
  }
249
267
  guiInfoLog(guiInfoStatusTypes.ERROR, {
@@ -62,14 +62,19 @@ export const screenshotFullPage = async (page, screenshotsDir, screenshotIdx) =>
62
62
  await page.evaluate(() => {
63
63
  window.scrollTo(0, document.body.scrollHeight);
64
64
  });
65
- const isLoadMoreContent = async () => new Promise(resolve => {
66
- setTimeout(async () => {
65
+ const isLoadMoreContent = async () => {
66
+ await new Promise(resolve => setTimeout(resolve, 2500));
67
+ if (page.isClosed())
68
+ return false;
69
+ try {
67
70
  await page.waitForLoadState('domcontentloaded');
68
71
  const newHeight = await page.evaluate(() => document.body.scrollHeight);
69
- const result = newHeight > prevHeight;
70
- resolve(result);
71
- }, 2500);
72
- });
72
+ return newHeight > prevHeight;
73
+ }
74
+ catch {
75
+ return false;
76
+ }
77
+ };
73
78
  const result = await isLoadMoreContent();
74
79
  return result;
75
80
  };
@@ -291,22 +296,60 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
291
296
  const h2 = document.createElement('h2');
292
297
  h2.id = 'oobeeHPagesScanned';
293
298
  h2.className = 'oobee-section-title';
294
- h2.textContent = 'Pages Scanned';
299
+ h2.textContent = `Pages Scanned (${vars.urlsCrawled.scanned.length || 0})`;
300
+ const scanIcon = document.createElement('span');
301
+ scanIcon.className = 'oobee-btn-icon';
302
+ const SCAN_SVG = `
303
+ <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 20 20" fill="none">
304
+ <g clip-path="url(#clip0_1421_431)">
305
+ <path d="M12.5763 11.5472L12.2958 11.2857L12.1037 11.1005C12.776 10.3183 12.9194 9.56432 12.9194 8.45969C12.9194 5.99657 10.9228 4 8.45969 4C5.99657 4 4 5.99657 4 8.45969C4 10.9228 5.99657 12.9194 8.45969 12.9194C9.56432 12.9194 10.3183 12.776 11.1005 12.1037L11.2857 12.2958L11.5472 12.5763L14.9777 16L16 14.9777L12.5763 11.5472ZM8.45969 11.5472C6.75129 11.5472 5.37221 10.1681 5.37221 8.45969C5.37221 6.75129 6.75129 5.37221 8.45969 5.37221C10.1681 5.37221 11.5472 6.75129 11.5472 8.45969C11.5472 10.1681 10.1681 11.5472 8.45969 11.5472Z" fill="white"/>
306
+ <path d="M18.5 0H19.5C19.7761 0 20 0.223858 20 0.5V5H18.5V0Z" fill="white"/>
307
+ <path d="M19.5 2.18552e-08L19.5 1.5L15 1.5L15 -2.18556e-07L19.5 2.18552e-08Z" fill="white"/>
308
+ <path d="M1.5 0H0.5C0.223858 0 0 0.223858 0 0.5V5H1.5V0Z" fill="white"/>
309
+ <path d="M0.5 2.18552e-08L0.5 1.5L5 1.5L5 -2.18556e-07L0.5 2.18552e-08Z" fill="white"/>
310
+ <path d="M1.5 20H0.5C0.223858 20 0 19.7761 0 19.5V15H1.5V20Z" fill="white"/>
311
+ <path d="M0.5 20L0.5 18.5L5 18.5L5 20L0.5 20Z" fill="white"/>
312
+ <path d="M18.5 20H19.5C19.7761 20 20 19.7761 20 19.5V15H18.5V20Z" fill="white"/>
313
+ <path d="M19.5 20L19.5 18.5L15 18.5L15 20L19.5 20Z" fill="white"/>
314
+ </g>
315
+ <defs>
316
+ <clipPath id="clip0_1421_431">
317
+ <rect width="20" height="20" fill="white"/>
318
+ </clipPath>
319
+ </defs>
320
+ </svg>
321
+ `;
322
+ scanIcon.innerHTML = SCAN_SVG;
295
323
  const scanBtn = document.createElement('button');
296
324
  scanBtn.id = 'oobeeBtnScan';
297
325
  scanBtn.className = 'oobee-btn oobee-btn-primary';
298
- scanBtn.innerText = 'Scan this page';
299
326
  scanBtn.disabled = inProgress;
327
+ scanBtn.appendChild(scanIcon);
328
+ const scanText = document.createElement('span');
329
+ scanText.className = 'oobee-btn-text';
330
+ scanText.innerText = 'Scan page';
331
+ scanBtn.appendChild(scanText);
300
332
  scanBtn.addEventListener('click', async () => customWindow.handleOnScanClick?.());
301
- const stopBtn = document.createElement('button');
302
- stopBtn.id = 'oobeeBtnStop';
303
- stopBtn.className = 'oobee-btn oobee-btn-secondary';
304
- stopBtn.innerText = 'Stop scan';
305
- stopBtn.addEventListener('click', async () => customWindow.handleOnStopClick?.());
333
+ const endScanIcon = document.createElement('span');
334
+ endScanIcon.className = 'oobee-btn-icon';
335
+ const ENDSCAN_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 20 20" fill="none">
336
+ <path d="M10 0C4.47 0 0 4.47 0 10C0 15.53 4.47 20 10 20C15.53 20 20 15.53 20 10C20 4.47 15.53 0 10 0ZM10 18C5.59 18 2 14.41 2 10C2 5.59 5.59 2 10 2C14.41 2 18 5.59 18 10C18 14.41 14.41 18 10 18ZM13.59 5L10 8.59L6.41 5L5 6.41L8.59 10L5 13.59L6.41 15L10 11.41L13.59 15L15 13.59L11.41 10L15 6.41L13.59 5Z" fill="#9021A6"/>
337
+ </svg>
338
+ `;
339
+ endScanIcon.innerHTML = ENDSCAN_SVG;
340
+ const endScanBtn = document.createElement('button');
341
+ endScanBtn.id = 'oobeeBtnEndScan';
342
+ endScanBtn.className = 'oobee-btn oobee-btn-secondary';
343
+ endScanBtn.appendChild(endScanIcon);
344
+ const endScanText = document.createElement('span');
345
+ endScanText.className = 'oobee-btn-text';
346
+ endScanText.innerText = 'End scan';
347
+ endScanBtn.appendChild(endScanText);
348
+ endScanBtn.addEventListener('click', async () => customWindow.handleOnStopClick?.());
306
349
  const btnGroup = document.createElement('div');
307
350
  btnGroup.className = 'oobee-actions';
308
351
  btnGroup.appendChild(scanBtn);
309
- btnGroup.appendChild(stopBtn);
352
+ btnGroup.appendChild(endScanBtn);
310
353
  const listWrap = document.createElement('div');
311
354
  listWrap.id = 'oobeeList';
312
355
  listWrap.className = 'oobee-list';
@@ -370,7 +413,7 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
370
413
  border-right: 1px solid rgba(0,0,0,.08)
371
414
  }
372
415
  .oobee-panel.collapsed {
373
- width: 56px;
416
+ width: 58px;
374
417
  overflow: hidden
375
418
  }
376
419
 
@@ -447,6 +490,12 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
447
490
  padding: 1rem;
448
491
  }
449
492
 
493
+ .oobee-panel.collapsed .oobee-actions {
494
+ display: flex;
495
+ justify-content: center;
496
+ padding: 1rem 0.7rem;
497
+ }
498
+
450
499
  /* Base button */
451
500
  .oobee-btn {
452
501
  width: 100%;
@@ -457,6 +506,10 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
457
506
  line-height: 1.2;
458
507
  font-weight: 400;
459
508
  cursor: pointer;
509
+ display: flex;
510
+ align-items: center;
511
+ justify-content: center;
512
+ gap: 10px;
460
513
  transition: {
461
514
  box-shadow .12s ease,
462
515
  transform .02s ease,
@@ -470,6 +523,19 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
470
523
  cursor:not-allowed
471
524
  }
472
525
 
526
+ .oobee-panel.collapsed .oobee-btn {
527
+ width: 44px !important;
528
+ height: 44px !important;
529
+ min-width: 44px !important;
530
+ min-height: 44px !important;
531
+ max-width: 44px !important;
532
+ max-height: 44px !important;
533
+ border-radius: 50% !important;
534
+ padding: 0 !important;
535
+ justify-content: center;
536
+ gap: 0;
537
+ }
538
+
473
539
  /* Primary (filled) */
474
540
  .oobee-btn-primary {
475
541
  background: #9021a6;
@@ -525,6 +591,25 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
525
591
  display: none;
526
592
  }
527
593
 
594
+ .oobee-btn-icon {
595
+ display: inline-flex;
596
+ align-items: center;
597
+ justify-content: center;
598
+ width: 20px;
599
+ height: 20px;
600
+ vertical-align: middle;
601
+ }
602
+
603
+ .oobee-btn-text {
604
+ display: inline;
605
+ white-space: nowrap;
606
+ vertical-align: middle;
607
+ }
608
+
609
+ .oobee-panel.collapsed .oobee-btn-text {
610
+ display: none;
611
+ }
612
+
528
613
  #oobeeStopOverlay[hidden] {
529
614
  display:none !important;
530
615
  }
@@ -542,7 +627,10 @@ export const addOverlayMenu = async (page, urlsCrawled, menuPos, opts = {
542
627
  }
543
628
 
544
629
  .oobee-panel.collapsed .oobee-section-title {
545
- display: none;
630
+ font-size: 14px;
631
+ display: flex;
632
+ justify-content: center;
633
+ text-align: center;
546
634
  }
547
635
 
548
636
  .oobee-ol {
@@ -921,7 +1009,14 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
921
1009
  let menuPos = MENU_POSITION.right;
922
1010
  // eslint-disable-next-line no-underscore-dangle
923
1011
  const pageId = page._guid;
924
- page.on('dialog', () => { });
1012
+ page.on('dialog', async (dialog) => {
1013
+ try {
1014
+ await dialog.dismiss();
1015
+ }
1016
+ catch {
1017
+ // dialog may already be closed
1018
+ }
1019
+ });
925
1020
  const pageClosePromise = new Promise(resolve => {
926
1021
  page.on('close', () => {
927
1022
  log(`Page: close detected: ${page.url()}`);
@@ -947,6 +1042,8 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
947
1042
  await processPage(page, processPageParams);
948
1043
  log('Scan: success');
949
1044
  pagesDict[pageId].isScanning = false;
1045
+ if (page.isClosed())
1046
+ return;
950
1047
  const allowed = isOverlayAllowed(page.url(), processPageParams.entryUrl);
951
1048
  if (allowed) {
952
1049
  await addOverlayMenu(page, processPageParams.urlsCrawled, menuPos, {
@@ -987,10 +1084,10 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
987
1084
  });
988
1085
  if (!inputValue?.confirmed) {
989
1086
  await page.evaluate(() => {
990
- const stopBtn = document.getElementById('oobeeBtnStop');
991
- if (stopBtn) {
992
- stopBtn.disabled = false;
993
- stopBtn.textContent = 'Stop';
1087
+ const endScanBtn = document.getElementById('oobeeBtnEndScan');
1088
+ if (endScanBtn) {
1089
+ endScanBtn.disabled = false;
1090
+ endScanBtn.textContent = 'Stop';
994
1091
  }
995
1092
  });
996
1093
  return;
@@ -1019,6 +1116,8 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
1019
1116
  }
1020
1117
  };
1021
1118
  page.on('domcontentloaded', async () => {
1119
+ if (page.isClosed())
1120
+ return;
1022
1121
  try {
1023
1122
  const allowed = isOverlayAllowed(page.url(), processPageParams.entryUrl);
1024
1123
  if (!allowed) {
@@ -1051,15 +1150,22 @@ export const initNewPage = async (page, pageClosePromises, processPageParams, pa
1051
1150
  consoleLogger.info('Error in adding overlay menu to page');
1052
1151
  }
1053
1152
  });
1054
- await page.exposeFunction('handleOnScanClick', handleOnScanClick);
1055
- await page.exposeFunction('handleOnStopClick', handleOnStopClick);
1056
- // Define the updateMenuPos function
1057
- const updateMenuPos = newPos => {
1058
- const prevPos = menuPos;
1059
- if (prevPos !== newPos) {
1060
- menuPos = newPos;
1061
- }
1062
- };
1063
- await page.exposeFunction('updateMenuPos', updateMenuPos);
1153
+ try {
1154
+ if (page.isClosed())
1155
+ return page;
1156
+ await page.exposeFunction('handleOnScanClick', handleOnScanClick);
1157
+ await page.exposeFunction('handleOnStopClick', handleOnStopClick);
1158
+ // Define the updateMenuPos function
1159
+ const updateMenuPos = newPos => {
1160
+ const prevPos = menuPos;
1161
+ if (prevPos !== newPos) {
1162
+ menuPos = newPos;
1163
+ }
1164
+ };
1165
+ await page.exposeFunction('updateMenuPos', updateMenuPos);
1166
+ }
1167
+ catch (e) {
1168
+ log(`Error exposing functions on page: ${e}`);
1169
+ }
1064
1170
  return page;
1065
1171
  };
@@ -2,15 +2,16 @@ const ALLOWED_PROTOCOLS = new Set(['http:', 'https:']);
2
2
  export function addUrlGuardScript(context, opts = {}) {
3
3
  const { fallbackUrl } = opts;
4
4
  const lastAllowedUrlByPage = new WeakMap();
5
- const attachGuardsToPage = (page) => {
5
+ const attachGuardsToPage = page => {
6
6
  if (!lastAllowedUrlByPage.has(page) && fallbackUrl) {
7
7
  lastAllowedUrlByPage.set(page, String(fallbackUrl));
8
8
  }
9
- page.addInitScript(() => {
10
- const isAllowedProtocol = (value) => {
9
+ page
10
+ .addInitScript(() => {
11
+ const isAllowedProtocol = value => {
11
12
  try {
12
13
  const s = value instanceof URL ? value.toString() : String(value);
13
- const protocol = new URL(s, window.location.href).protocol;
14
+ const { protocol } = new URL(s, window.location.href);
14
15
  return protocol === 'http:' || protocol === 'https:';
15
16
  }
16
17
  catch {
@@ -24,17 +25,9 @@ export function addUrlGuardScript(context, opts = {}) {
24
25
  return null;
25
26
  return openOriginal.call(this, targetUrl, ...args);
26
27
  };
27
- const assignOriginal = win.location.assign.bind(win.location);
28
- const replaceOriginal = win.location.replace.bind(win.location);
29
- win.location.assign = (nextUrl) => { if (isAllowedProtocol(nextUrl))
30
- assignOriginal(nextUrl); };
31
- win.location.replace = (nextUrl) => { if (isAllowedProtocol(nextUrl))
32
- replaceOriginal(nextUrl); };
33
- Object.defineProperty(win.location, 'href', {
34
- get() { return String(win.location.toString()); },
35
- set(nextUrl) { if (isAllowedProtocol(nextUrl))
36
- assignOriginal(nextUrl); },
37
- });
28
+ })
29
+ .catch(() => {
30
+ // page may have closed before addInitScript completed; safe to ignore
38
31
  });
39
32
  const restoreToSafeUrl = async (page, attemptedUrl) => {
40
33
  try {
@@ -1,12 +1,11 @@
1
1
  /* eslint-env browser */
2
- import { chromium } from 'playwright';
3
2
  import { createCrawleeSubFolders } from './commonCrawlerFunc.js';
4
3
  import { cleanUpAndExit, register, registerSoftClose } from '../utils.js';
5
4
  import constants, { getIntermediateScreenshotsPath, guiInfoStatusTypes, } from '../constants/constants.js';
6
5
  import { initNewPage, log } from './custom/utils.js';
7
6
  import { guiInfoLog } from '../logs.js';
8
7
  import { addUrlGuardScript } from './guards/urlGuard.js';
9
- import { getPlaywrightLaunchOptions } from '../constants/common.js';
8
+ import { getBrowserToRun, getPlaywrightLaunchOptions, initModifiedUserAgent, } from '../constants/common.js';
10
9
  // Export of classes
11
10
  export class ProcessPageParams {
12
11
  constructor(scannedIdx, blacklistedPatterns, includeScreenshots, dataset, intermediateScreenshotsPath, urlsCrawled, randomToken) {
@@ -19,7 +18,7 @@ export class ProcessPageParams {
19
18
  this.randomToken = randomToken;
20
19
  }
21
20
  }
22
- const runCustom = async (url, randomToken, viewportSettings, blacklistedPatterns, includeScreenshots, initialCustomFlowLabel) => {
21
+ const runCustom = async (url, randomToken, browserToRun, userDataDirectory, viewportSettings, blacklistedPatterns, includeScreenshots, initialCustomFlowLabel) => {
23
22
  // checks and delete datasets path if it already exists
24
23
  process.env.CRAWLEE_STORAGE_DIR = randomToken;
25
24
  const urlsCrawled = { ...constants.urlsCrawledObj };
@@ -34,9 +33,13 @@ const runCustom = async (url, randomToken, viewportSettings, blacklistedPatterns
34
33
  const pagesDict = {};
35
34
  const pageClosePromises = [];
36
35
  try {
36
+ const { browserToRun: resolvedBrowserToRun } = getBrowserToRun(randomToken, browserToRun, false);
37
37
  const deviceConfig = viewportSettings.playwrightDeviceDetailsObject;
38
38
  const hasCustomViewport = !!deviceConfig;
39
- const baseLaunchOptions = getPlaywrightLaunchOptions();
39
+ const rawDevice = (deviceConfig || {});
40
+ const { userAgent: deviceUserAgent, ...contextDeviceOptions } = rawDevice;
41
+ await initModifiedUserAgent(resolvedBrowserToRun, viewportSettings.playwrightDeviceDetailsObject);
42
+ const baseLaunchOptions = getPlaywrightLaunchOptions(resolvedBrowserToRun);
40
43
  // Merge base args with custom flow specific args
41
44
  const baseArgs = baseLaunchOptions.args || [];
42
45
  const customArgs = hasCustomViewport ? ['--window-size=1920,1040'] : ['--start-maximized'];
@@ -44,33 +47,37 @@ const runCustom = async (url, randomToken, viewportSettings, blacklistedPatterns
44
47
  ...baseArgs.filter(a => !a.startsWith('--window-size') && a !== '--start-maximized'),
45
48
  ...customArgs,
46
49
  ];
47
- const browser = await chromium.launch({
50
+ const context = await constants.launcher.launchPersistentContext(userDataDirectory, {
48
51
  ...baseLaunchOptions,
49
52
  args: mergedArgs,
50
53
  headless: false,
51
- });
52
- const context = await browser.newContext({
53
54
  ignoreHTTPSErrors: true,
54
55
  serviceWorkers: 'block',
55
56
  viewport: null,
56
- ...(hasCustomViewport ? deviceConfig : {}),
57
+ ...(hasCustomViewport ? contextDeviceOptions : {}),
58
+ userAgent: process.env.OOBEE_USER_AGENT || deviceUserAgent,
57
59
  });
58
60
  register(context);
59
61
  processPageParams.stopAll = async () => {
60
62
  try {
61
63
  await context.close().catch(() => { });
62
- await browser.close().catch(() => { });
63
64
  }
64
65
  catch { }
65
66
  };
66
67
  // For handling closing playwright browser and continue generate artifacts etc
67
68
  registerSoftClose(processPageParams.stopAll);
68
69
  addUrlGuardScript(context, { fallbackUrl: url });
70
+ const page = context.pages().find(existingPage => !existingPage.isClosed()) || (await context.newPage());
71
+ await initNewPage(page, pageClosePromises, processPageParams, pagesDict);
69
72
  // Detection of new page
70
73
  context.on('page', async (newPage) => {
71
- await initNewPage(newPage, pageClosePromises, processPageParams, pagesDict);
74
+ try {
75
+ await initNewPage(newPage, pageClosePromises, processPageParams, pagesDict);
76
+ }
77
+ catch (e) {
78
+ log(`Error initializing new page: ${e}`);
79
+ }
72
80
  });
73
- const page = await context.newPage();
74
81
  await page.goto(url, { timeout: 0 });
75
82
  // to execute and wait for all pages to close
76
83
  // idea is for promise to be pending until page.on('close') detected