@govtechsg/oobee 0.10.69 → 0.10.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -571,7 +571,7 @@ The following URL and file validation error codes are provided to troubleshoot t
571
571
  | 11 | invalidUrl | Invalid URL. Please check and try again. | • Ensure the URL starts with `http://` or `https://`.<br>• Check for typos in the URL. |
572
572
  | 12 | cannotBeResolved | URL cannot be accessed. Please verify whether the website exists. | • Confirm the domain name is correct.<br>• Check DNS resolution with `ping` or `nslookup`.<br>• Ensure the site is publicly accessible (not behind VPN/firewall). |
573
573
  | 14 | systemError | Something went wrong when verifying the URL. Please try again in a few minutes. If this issue persists, please contact the Oobee team. | • Retry after a few minutes.<br>• Check internet connection.<br>• If persistent, report as a system issue. |
574
- | 15 | notASitemap | Invalid sitemap URL format. Please enter a valid sitemap URL ending with .XML e.g. https://www.example.com/sitemap.xml. | • Ensure the URL points to a valid XML sitemap.<br>• View [Examples of sitemaps sitemaps.org - Protocol](https://www.sitemaps.org/protocol.html)<br>• Test the URL in a browser to confirm it returns XML. |
574
+ | 15 | notASitemap | Invalid sitemap URL format. Please enter a valid sitemap URL ending with .XML or .TXT e.g. https://www.example.com/sitemap.xml. | • Ensure the URL points to a valid XML sitemap.<br>• View [Examples of sitemaps sitemaps.org - Protocol](https://www.sitemaps.org/protocol.html)<br>• Test the URL in a browser to confirm it returns XML. |
575
575
  | 16 | unauthorised | Login required. Please enter your credentials and try again. | • Check if the site requires username/password.<br>• Provide credentials in Oobee if supported. |
576
576
  | 17 | browserError | Incompatible browser. Please ensure you are using Chrome or Edge browser. | • Install the latest version of Chrome or Edge.|
577
577
  | 18 | sslProtocolError | SSL certificate error. Please check the SSL configuration of your website and try again. | • Verify SSL certificate validity (not expired, issued by trusted CA).<br>• Check for mismatched TLS versions or cipher issues.<br>• Use an SSL checker tool (e.g., Qualys SSL Labs). |
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@govtechsg/oobee",
3
3
  "main": "dist/npmIndex.js",
4
- "version": "0.10.69",
4
+ "version": "0.10.70",
5
5
  "type": "module",
6
6
  "author": "Government Technology Agency <info@tech.gov.sg>",
7
7
  "dependencies": {
@@ -397,7 +397,7 @@ const urlCheckStatuses = {
397
397
  message: 'Provided URL cannot be accessed. Server responded with code ', // append it with the response code received,
398
398
  },
399
399
  systemError: { code: 14, message: 'Something went wrong when verifying the URL. Please try again in a few minutes. If this issue persists, please contact the Oobee team.'},
400
- notASitemap: { code: 15, message: 'Invalid sitemap URL format. Please enter a valid sitemap URL ending with .XML e.g. https://www.example.com/sitemap.xml.' },
400
+ notASitemap: { code: 15, message: 'Invalid sitemap URL format. Please enter a valid sitemap URL ending with .XML or .TXT e.g. https://www.example.com/sitemap.xml.' },
401
401
  unauthorised: { code: 16, message: 'Login required. Please enter your credentials and try again.' },
402
402
  // browserError means engine could not find a browser to run the scan
403
403
  browserError: {
@@ -198,6 +198,10 @@ export const crawlLocalFile = async ({
198
198
  results.actualUrl = actualUrl;
199
199
 
200
200
  await dataset.pushData(results);
201
+
202
+ // Ensure proper cleanup of browser context before PDF generation
203
+ await browserContext.close().catch(() => {});
204
+
201
205
  } else {
202
206
 
203
207
  const pdfFileName = path.basename(url);
@@ -0,0 +1,77 @@
1
+ const ALLOWED_PROTOCOLS = new Set(['http:', 'https:']);
2
+
3
+ export function addUrlGuardScript(context, opts = {}) {
4
+ const { fallbackUrl }: any = opts;
5
+
6
+ const lastAllowedUrlByPage = new WeakMap();
7
+
8
+ const attachGuardsToPage = (page) => {
9
+ if (!lastAllowedUrlByPage.has(page) && fallbackUrl) {
10
+ lastAllowedUrlByPage.set(page, String(fallbackUrl));
11
+ }
12
+
13
+ page.addInitScript(() => {
14
+ const isAllowedProtocol = (value) => {
15
+ try {
16
+ const s = value instanceof URL ? value.toString() : String(value);
17
+ const protocol = new URL(s, window.location.href).protocol;
18
+ return protocol === 'http:' || protocol === 'https:';
19
+ } catch {
20
+ return false;
21
+ }
22
+ };
23
+
24
+ const win = window;
25
+
26
+ const openOriginal = win.open;
27
+ win.open = function (targetUrl, ...args) {
28
+ if (!isAllowedProtocol(targetUrl)) return null;
29
+ return openOriginal.call(this, targetUrl, ...args);
30
+ };
31
+
32
+ const assignOriginal = win.location.assign.bind(win.location);
33
+ const replaceOriginal = win.location.replace.bind(win.location);
34
+
35
+ win.location.assign = (nextUrl) => { if (isAllowedProtocol(nextUrl)) assignOriginal(nextUrl); };
36
+ win.location.replace = (nextUrl) => { if (isAllowedProtocol(nextUrl)) replaceOriginal(nextUrl); };
37
+
38
+ Object.defineProperty(win.location, 'href', {
39
+ get() { return String(win.location.toString()); },
40
+ set(nextUrl) { if (isAllowedProtocol(nextUrl)) assignOriginal(nextUrl); },
41
+ });
42
+ });
43
+
44
+ const restoreToSafeUrl = async (page, attemptedUrl) => {
45
+ try {
46
+ const safeUrl = lastAllowedUrlByPage.get(page) || fallbackUrl || 'about:blank';
47
+ await page.goto(safeUrl, { waitUntil: 'domcontentloaded' });
48
+ } catch {
49
+ // page might be closing; ignore
50
+ }
51
+ };
52
+
53
+ page.on('framenavigated', async (frame) => {
54
+ if (frame !== page.mainFrame()) return;
55
+
56
+ const urlStr = frame.url();
57
+ let urlObj;
58
+ try {
59
+ urlObj = new URL(urlStr);
60
+ } catch {
61
+ return restoreToSafeUrl(page, urlStr);
62
+ }
63
+
64
+ if (ALLOWED_PROTOCOLS.has(urlObj.protocol)) {
65
+ lastAllowedUrlByPage.set(page, urlObj.toString());
66
+ return;
67
+ }
68
+ await restoreToSafeUrl(page, urlStr);
69
+ });
70
+ };
71
+
72
+ // Guard existing and future pages
73
+ for (const page of context.pages()) attachGuardsToPage(page);
74
+ context.on('page', attachGuardsToPage);
75
+ }
76
+
77
+ export default addUrlGuardScript;
@@ -10,6 +10,7 @@ import constants, {
10
10
  import { DEBUG, initNewPage, log } from './custom/utils.js';
11
11
  import { guiInfoLog } from '../logs.js';
12
12
  import { ViewportSettingsClass } from '../combine.js';
13
+ import { addUrlGuardScript } from './guards/urlGuard.js';
13
14
 
14
15
  // Export of classes
15
16
 
@@ -84,6 +85,8 @@ const runCustom = async (
84
85
 
85
86
  register(context);
86
87
 
88
+ addUrlGuardScript(context, { fallbackUrl: url });
89
+
87
90
  // Detection of new page
88
91
  context.on('page', async newPage => {
89
92
  await initNewPage(newPage, pageClosePromises, processPageParams, pagesDict);