@govtechsg/oobee 0.10.83 → 0.10.85
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -1
- package/dist/cli.js +7 -6
- package/dist/constants/common.js +13 -1
- package/dist/crawlers/crawlDomain.js +220 -120
- package/dist/crawlers/crawlIntelligentSitemap.js +22 -7
- package/dist/crawlers/custom/utils.js +81 -40
- package/dist/crawlers/runCustom.js +13 -5
- package/dist/mergeAxeResults/itemReferences.js +55 -0
- package/dist/mergeAxeResults/jsonArtifacts.js +335 -0
- package/dist/mergeAxeResults/scanPages.js +159 -0
- package/dist/mergeAxeResults/sentryTelemetry.js +152 -0
- package/dist/mergeAxeResults/types.js +1 -0
- package/dist/mergeAxeResults/writeCsv.js +125 -0
- package/dist/mergeAxeResults/writeScanDetailsCsv.js +35 -0
- package/dist/mergeAxeResults/writeSitemap.js +10 -0
- package/dist/mergeAxeResults.js +64 -950
- package/dist/proxyService.js +90 -5
- package/dist/utils.js +20 -7
- package/package.json +6 -6
- package/src/cli.ts +20 -15
- package/src/constants/common.ts +13 -1
- package/src/crawlers/crawlDomain.ts +248 -137
- package/src/crawlers/crawlIntelligentSitemap.ts +22 -8
- package/src/crawlers/custom/utils.ts +103 -48
- package/src/crawlers/runCustom.ts +18 -5
- package/src/mergeAxeResults/itemReferences.ts +62 -0
- package/src/mergeAxeResults/jsonArtifacts.ts +451 -0
- package/src/mergeAxeResults/scanPages.ts +207 -0
- package/src/mergeAxeResults/sentryTelemetry.ts +183 -0
- package/src/mergeAxeResults/types.ts +99 -0
- package/src/mergeAxeResults/writeCsv.ts +145 -0
- package/src/mergeAxeResults/writeScanDetailsCsv.ts +51 -0
- package/src/mergeAxeResults/writeSitemap.ts +13 -0
- package/src/mergeAxeResults.ts +125 -1344
- package/src/proxyService.ts +96 -4
- package/src/utils.ts +19 -7
package/src/proxyService.ts
CHANGED
|
@@ -29,6 +29,8 @@ export interface ProxyInfo {
|
|
|
29
29
|
// optional global credentials to embed (URL-encoded)
|
|
30
30
|
username?: string;
|
|
31
31
|
password?: string;
|
|
32
|
+
// optional include-only list of domain patterns (from INCLUDE_PROXY env)
|
|
33
|
+
includeList?: string[];
|
|
32
34
|
}
|
|
33
35
|
|
|
34
36
|
export interface ProxySettings {
|
|
@@ -329,7 +331,7 @@ function parseWindowsRegistry(): ProxyInfo | null {
|
|
|
329
331
|
if (enabledManual && v.ProxyServer) {
|
|
330
332
|
const s = v.ProxyServer.trim();
|
|
331
333
|
if (s.includes('=')) {
|
|
332
|
-
for (const part of s.split(
|
|
334
|
+
for (const part of s.split(/[,;]/)) {
|
|
333
335
|
const [proto, addr] = part.split('=');
|
|
334
336
|
if (!proto || !addr) continue;
|
|
335
337
|
const p = proto.trim().toLowerCase();
|
|
@@ -359,14 +361,104 @@ function parseWindowsRegistry(): ProxyInfo | null {
|
|
|
359
361
|
|
|
360
362
|
export function getProxyInfo(): ProxyInfo | null {
|
|
361
363
|
const plat = os.platform();
|
|
362
|
-
|
|
363
|
-
if (plat === '
|
|
364
|
-
|
|
364
|
+
let info: ProxyInfo | null;
|
|
365
|
+
if (plat === 'win32') info = parseEnvProxyCommon() || parseWindowsRegistry();
|
|
366
|
+
else if (plat === 'darwin') info = parseEnvProxyCommon() || parseMacScutil();
|
|
367
|
+
else info = parseEnvProxyCommon(); // Linux/others
|
|
368
|
+
|
|
369
|
+
// Apply INCLUDE_PROXY env: semicolon-separated domain globs that SHOULD use the proxy
|
|
370
|
+
const includeProxy = process.env.INCLUDE_PROXY;
|
|
371
|
+
if (includeProxy && info) {
|
|
372
|
+
const patterns = includeProxy
|
|
373
|
+
.split(/[,;]/)
|
|
374
|
+
.map(s => s.trim())
|
|
375
|
+
.filter(Boolean);
|
|
376
|
+
if (patterns.length > 0) {
|
|
377
|
+
// INCLUDE_PROXY and NO_PROXY are mutually exclusive; INCLUDE_PROXY takes precedence
|
|
378
|
+
const noProxy = process.env.NO_PROXY || process.env.no_proxy;
|
|
379
|
+
if (noProxy || info.bypassList) {
|
|
380
|
+
console.warn(
|
|
381
|
+
'INCLUDE_PROXY is set — ignoring NO_PROXY / bypass list. ' +
|
|
382
|
+
'These two settings cannot be mixed; INCLUDE_PROXY takes precedence.',
|
|
383
|
+
);
|
|
384
|
+
info.bypassList = undefined;
|
|
385
|
+
}
|
|
386
|
+
info.includeList = patterns;
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
return info;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
/**
|
|
394
|
+
* Convert a glob-style domain pattern (e.g. *.example.com) to a regex source string.
|
|
395
|
+
*/
|
|
396
|
+
function domainPatternToRegex(pattern: string): string {
|
|
397
|
+
// Escape dots, replace * with [^.]* or .* depending on position
|
|
398
|
+
// *.example.com → match any subdomain of example.com
|
|
399
|
+
let escaped = pattern
|
|
400
|
+
.replace(/[.+^${}()|[\]\\]/g, '\\$&') // escape regex specials except *
|
|
401
|
+
.replace(/\*/g, '.*'); // convert glob * to regex .*
|
|
402
|
+
return `^${escaped}$`;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
/**
|
|
406
|
+
* Build a PAC script that routes only includeList domains through the given proxy,
|
|
407
|
+
* and returns DIRECT for everything else.
|
|
408
|
+
*/
|
|
409
|
+
function buildIncludeOnlyPac(proxyServer: string, includeList: string[]): string {
|
|
410
|
+
// proxyServer is like "http://host:port" or "socks5://host:port"
|
|
411
|
+
let pacProxy: string;
|
|
412
|
+
if (proxyServer.startsWith('socks5://')) {
|
|
413
|
+
pacProxy = `SOCKS5 ${proxyServer.replace('socks5://', '')}`;
|
|
414
|
+
} else if (proxyServer.startsWith('socks://') || proxyServer.startsWith('socks4://')) {
|
|
415
|
+
pacProxy = `SOCKS ${proxyServer.replace(/^socks[4]?:\/\//, '')}`;
|
|
416
|
+
} else {
|
|
417
|
+
// http:// or https:// → PROXY host:port
|
|
418
|
+
pacProxy = `PROXY ${proxyServer.replace(/^https?:\/\//, '')}`;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
// Build JS conditions using shExpMatch (built-in PAC function that supports glob patterns)
|
|
422
|
+
const conditions = includeList
|
|
423
|
+
.map(pattern => `shExpMatch(host, ${JSON.stringify(pattern)})`)
|
|
424
|
+
.join(' || ');
|
|
425
|
+
|
|
426
|
+
const pac = [
|
|
427
|
+
'function FindProxyForURL(url, host) {',
|
|
428
|
+
` if (${conditions}) {`,
|
|
429
|
+
` return ${JSON.stringify(pacProxy)};`,
|
|
430
|
+
' }',
|
|
431
|
+
' return "DIRECT";',
|
|
432
|
+
'}',
|
|
433
|
+
].join('\n');
|
|
434
|
+
|
|
435
|
+
return pac;
|
|
365
436
|
}
|
|
366
437
|
|
|
367
438
|
export function proxyInfoToResolution(info: ProxyInfo | null): ProxyResolution {
|
|
368
439
|
if (!info) return { kind: 'none' };
|
|
369
440
|
|
|
441
|
+
// If INCLUDE_PROXY is set, generate a PAC that only proxies the listed domains
|
|
442
|
+
if (info.includeList && info.includeList.length > 0) {
|
|
443
|
+
// Determine the proxy server string
|
|
444
|
+
let proxyServer: string | undefined;
|
|
445
|
+
if (info.http) proxyServer = `http://${info.http}`;
|
|
446
|
+
else if (info.https) proxyServer = `http://${info.https}`;
|
|
447
|
+
else if (info.socks) proxyServer = `socks5://${info.socks}`;
|
|
448
|
+
|
|
449
|
+
if (proxyServer) {
|
|
450
|
+
// If credentials exist, embed them for the manual proxy auth
|
|
451
|
+
// PAC scripts themselves don't carry auth, but Playwright's proxy option can
|
|
452
|
+
// We use PAC for routing, and if creds are needed, Playwright will prompt or
|
|
453
|
+
// we set them via the proxy option. Unfortunately Playwright doesn't allow
|
|
454
|
+
// proxy.username with a PAC, so we embed in the launch arg and rely on
|
|
455
|
+
// Chromium's built-in auth handler for proxy auth challenges.
|
|
456
|
+
const pac = buildIncludeOnlyPac(proxyServer, info.includeList);
|
|
457
|
+
const pacDataUrl = `data:application/x-ns-proxy-autoconfig;base64,${Buffer.from(pac).toString('base64')}`;
|
|
458
|
+
return { kind: 'pac', pacUrl: pacDataUrl, bypass: info.bypassList };
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
|
|
370
462
|
// Prefer manual proxies first (these work with Playwright's proxy option)
|
|
371
463
|
if (info.http) {
|
|
372
464
|
return { kind: 'manual', settings: {
|
package/src/utils.ts
CHANGED
|
@@ -4,6 +4,7 @@ import os from 'os';
|
|
|
4
4
|
import fs from 'fs-extra';
|
|
5
5
|
import axe, { Rule } from 'axe-core';
|
|
6
6
|
import { v4 as uuidv4 } from 'uuid';
|
|
7
|
+
import { getDomain } from 'tldts';
|
|
7
8
|
import constants, {
|
|
8
9
|
BrowserTypes,
|
|
9
10
|
destinationPath,
|
|
@@ -1078,14 +1079,25 @@ export const randomThreeDigitNumberString = () => {
|
|
|
1078
1079
|
};
|
|
1079
1080
|
|
|
1080
1081
|
export const isFollowStrategy = (link1: string, link2: string, rule: string): boolean => {
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1082
|
+
try {
|
|
1083
|
+
const parsedLink1 = new URL(link1);
|
|
1084
|
+
const parsedLink2 = new URL(link2);
|
|
1085
|
+
if (rule === 'all') {
|
|
1086
|
+
return true;
|
|
1087
|
+
}
|
|
1088
|
+
if (rule === 'same-origin') {
|
|
1089
|
+
return parsedLink1.origin === parsedLink2.origin;
|
|
1090
|
+
}
|
|
1091
|
+
if (rule === 'same-domain') {
|
|
1092
|
+
const link1Domain = getDomain(parsedLink1.hostname, { allowPrivateDomains: true }) || parsedLink1.hostname;
|
|
1093
|
+
const link2Domain = getDomain(parsedLink2.hostname, { allowPrivateDomains: true }) || parsedLink2.hostname;
|
|
1094
|
+
return link1Domain.toLowerCase() === link2Domain.toLowerCase();
|
|
1095
|
+
}
|
|
1096
|
+
// default: same-hostname
|
|
1097
|
+
return parsedLink1.hostname === parsedLink2.hostname;
|
|
1098
|
+
} catch {
|
|
1099
|
+
return false;
|
|
1087
1100
|
}
|
|
1088
|
-
return parsedLink1.hostname === parsedLink2.hostname;
|
|
1089
1101
|
};
|
|
1090
1102
|
|
|
1091
1103
|
export const retryFunction = async <T>(func: () => Promise<T>, maxAttempt: number): Promise<T> => {
|