@govtechsg/oobee 0.10.58 → 0.10.62
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DETAILS.md +1 -1
- package/README.md +1 -0
- package/package.json +3 -2
- package/src/cli.ts +46 -99
- package/src/combine.ts +18 -6
- package/src/constants/cliFunctions.ts +5 -4
- package/src/constants/common.ts +207 -295
- package/src/constants/constants.ts +65 -32
- package/src/constants/questions.ts +11 -5
- package/src/crawlers/commonCrawlerFunc.ts +11 -5
- package/src/crawlers/crawlDomain.ts +34 -86
- package/src/crawlers/crawlIntelligentSitemap.ts +18 -11
- package/src/crawlers/crawlLocalFile.ts +9 -17
- package/src/crawlers/crawlSitemap.ts +30 -96
- package/src/crawlers/custom/utils.ts +5 -5
- package/src/crawlers/pdfScanFunc.ts +3 -2
- package/src/crawlers/runCustom.ts +4 -3
- package/src/index.ts +8 -9
- package/src/logs.ts +36 -11
- package/src/mergeAxeResults.ts +37 -31
- package/src/npmIndex.ts +4 -4
- package/src/screenshotFunc/htmlScreenshotFunc.ts +4 -4
- package/src/static/ejs/partials/scripts/utils.ejs +8 -11
- package/src/utils.ts +304 -15
@@ -12,17 +12,14 @@
|
|
12
12
|
// extract tagname and attribute name from html tag
|
13
13
|
// e.g. ["input", "type", "value", "role"] from <input type="text" value="..." role="..." />
|
14
14
|
const getHtmlTagAndAttributes = (htmlString) => {
|
15
|
-
const
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
return [tagName, ...attributes];
|
24
|
-
}
|
25
|
-
return [];
|
15
|
+
const tagMatch = htmlString.match(/^<\s*(\w+)/); // Get tag name
|
16
|
+
if (!tagMatch) return [];
|
17
|
+
|
18
|
+
const tagName = tagMatch[1];
|
19
|
+
const attrMatches = [...htmlString.matchAll(/(\w[\w-]*)\s*=\s*"[^"]*"/g)];
|
20
|
+
const attributes = attrMatches.map(match => match[1]);
|
21
|
+
|
22
|
+
return [tagName, ...attributes];
|
26
23
|
};
|
27
24
|
|
28
25
|
const rulesUsingRoles = [
|
package/src/utils.ts
CHANGED
@@ -9,8 +9,10 @@ import constants, {
|
|
9
9
|
destinationPath,
|
10
10
|
getIntermediateScreenshotsPath,
|
11
11
|
} from './constants/constants.js';
|
12
|
-
import { consoleLogger, silentLogger } from './logs.js';
|
12
|
+
import { consoleLogger, errorsTxtPath, silentLogger } from './logs.js';
|
13
13
|
import { getAxeConfiguration } from './crawlers/custom/getAxeConfiguration.js';
|
14
|
+
import { constant } from 'lodash';
|
15
|
+
import { errors } from 'playwright';
|
14
16
|
|
15
17
|
export const getVersion = () => {
|
16
18
|
const loadJSON = (filePath: string): { version: string } =>
|
@@ -33,21 +35,57 @@ export const isWhitelistedContentType = (contentType: string): boolean => {
|
|
33
35
|
};
|
34
36
|
|
35
37
|
export const getStoragePath = (randomToken: string): string => {
|
36
|
-
|
37
|
-
|
38
|
+
// If exportDirectory is set, use it
|
39
|
+
if (constants.exportDirectory) {
|
40
|
+
return constants.exportDirectory;
|
38
41
|
}
|
39
|
-
|
40
|
-
|
42
|
+
|
43
|
+
// Otherwise, use the current working directory
|
44
|
+
let storagePath = path.join(process.cwd(), 'results', randomToken);
|
45
|
+
|
46
|
+
// Ensure storagePath is writable; if directory doesn't exist, try to create it in Documents or home directory
|
47
|
+
const isWritable = (() => {
|
48
|
+
try {
|
49
|
+
if (!fs.existsSync(storagePath)) {
|
50
|
+
fs.mkdirSync(storagePath, { recursive: true });
|
51
|
+
}
|
52
|
+
fs.accessSync(storagePath, fs.constants.W_OK);
|
53
|
+
return true;
|
54
|
+
} catch {
|
55
|
+
return false;
|
56
|
+
}
|
57
|
+
})();
|
58
|
+
|
59
|
+
if (!isWritable) {
|
60
|
+
if (os.platform() === 'win32') {
|
61
|
+
// Use Documents folder on Windows
|
62
|
+
const documentsPath = path.join(process.env.USERPROFILE || process.env.HOMEPATH || '', 'Documents');
|
63
|
+
storagePath = path.join(documentsPath, 'Oobee', randomToken);
|
64
|
+
} else if (os.platform() === 'darwin') {
|
65
|
+
// Use Documents folder on Mac
|
66
|
+
const documentsPath = path.join(process.env.HOME || '', 'Documents');
|
67
|
+
storagePath = path.join(documentsPath, 'Oobee', randomToken);
|
68
|
+
} else {
|
69
|
+
// Use home directory for Linux/other
|
70
|
+
const homePath = process.env.HOME || '';
|
71
|
+
storagePath = path.join(homePath, 'Oobee', randomToken);
|
72
|
+
}
|
73
|
+
consoleLogger.warn(`Warning: Cannot write to cwd, writing to ${storagePath}`);
|
74
|
+
|
41
75
|
}
|
42
|
-
|
43
|
-
|
76
|
+
|
77
|
+
if (!fs.existsSync(storagePath)) {
|
78
|
+
fs.mkdirSync(storagePath, { recursive: true });
|
44
79
|
}
|
45
|
-
|
80
|
+
|
81
|
+
constants.exportDirectory = storagePath;
|
82
|
+
return storagePath;
|
83
|
+
|
46
84
|
};
|
47
85
|
|
48
86
|
export const createDetailsAndLogs = async (randomToken: string): Promise<void> => {
|
49
87
|
const storagePath = getStoragePath(randomToken);
|
50
|
-
const logPath =
|
88
|
+
const logPath = `${storagePath}}/logs`;
|
51
89
|
try {
|
52
90
|
await fs.ensureDir(storagePath);
|
53
91
|
|
@@ -193,8 +231,257 @@ export const createScreenshotsFolder = (randomToken: string): void => {
|
|
193
231
|
}
|
194
232
|
};
|
195
233
|
|
196
|
-
|
197
|
-
|
234
|
+
|
235
|
+
let __shuttingDown = false;
|
236
|
+
let __stopAllLock: Promise<void> | null = null;
|
237
|
+
|
238
|
+
/**
|
239
|
+
* Register a resource so it can be stopped later.
|
240
|
+
* Supports Crawlee crawlers, Playwright BrowserContexts, and Browsers.
|
241
|
+
*/
|
242
|
+
export function register(resource: any) {
|
243
|
+
const name = resource?.constructor?.name;
|
244
|
+
|
245
|
+
if (name?.endsWith('Crawler')) {
|
246
|
+
constants.resources.crawlers.add(resource);
|
247
|
+
} else if (name === 'BrowserContext') {
|
248
|
+
constants.resources.browserContexts.add(resource);
|
249
|
+
} else if (name === 'Browser') {
|
250
|
+
constants.resources.browsers.add(resource);
|
251
|
+
}
|
252
|
+
|
253
|
+
return resource;
|
254
|
+
}
|
255
|
+
|
256
|
+
/**
|
257
|
+
* Stops or tears down all tracked resources.
|
258
|
+
* @param mode "graceful" (finish in-flight), "abort" (drop in-flight), or "teardown" (close immediately)
|
259
|
+
* @param timeoutMs Max time to wait before forcing shutdown
|
260
|
+
*/
|
261
|
+
export async function stopAll({ mode = 'graceful', timeoutMs = 10_000 } = {}) {
|
262
|
+
if (__stopAllLock) return __stopAllLock; // prevent overlap
|
263
|
+
__stopAllLock = (async () => {
|
264
|
+
const timeout = (ms: number) => new Promise(res => setTimeout(res, ms));
|
265
|
+
consoleLogger.info(`Stop browsers starting, mode=${mode}, timeoutMs=${timeoutMs}`);
|
266
|
+
|
267
|
+
// --- Crawlers ---
|
268
|
+
for (const c of [...constants.resources.crawlers]) {
|
269
|
+
try {
|
270
|
+
const pool = (c as any).autoscaledPool;
|
271
|
+
if (pool && typeof pool.isRunning !== 'undefined' && !pool.isRunning) {
|
272
|
+
consoleLogger.info('Skipping crawler (already stopped)');
|
273
|
+
continue;
|
274
|
+
}
|
275
|
+
|
276
|
+
consoleLogger.info(`Closing crawler (${mode})...`);
|
277
|
+
if (mode === 'graceful') {
|
278
|
+
if (typeof c.stop === 'function') {
|
279
|
+
await Promise.race([c.stop(), timeout(timeoutMs)]);
|
280
|
+
}
|
281
|
+
} else if (mode === 'abort') {
|
282
|
+
pool?.abort?.();
|
283
|
+
} else {
|
284
|
+
if (typeof c.teardown === 'function') {
|
285
|
+
await Promise.race([c.teardown(), timeout(timeoutMs)]);
|
286
|
+
}
|
287
|
+
}
|
288
|
+
consoleLogger.info(`Crawler closed (${mode})`);
|
289
|
+
} catch (err) {
|
290
|
+
consoleLogger.warn(`Error stopping crawler: ${(err as Error).message}`);
|
291
|
+
} finally {
|
292
|
+
constants.resources.crawlers.delete(c);
|
293
|
+
}
|
294
|
+
}
|
295
|
+
|
296
|
+
// --- BrowserContexts ---
|
297
|
+
for (const ctx of [...constants.resources.browserContexts]) {
|
298
|
+
// compute once so we can also use in finally
|
299
|
+
const pagesArr = typeof ctx.pages === 'function' ? ctx.pages() : [];
|
300
|
+
const hasOpenPages = Array.isArray(pagesArr) && pagesArr.length > 0;
|
301
|
+
|
302
|
+
try {
|
303
|
+
const browser = typeof ctx.browser === 'function' ? ctx.browser() : null;
|
304
|
+
if (browser && (browser as any).isClosed?.()) {
|
305
|
+
consoleLogger.info('Skipping BrowserContext (browser already closed)');
|
306
|
+
continue;
|
307
|
+
}
|
308
|
+
|
309
|
+
// ➜ Graceful: don't kill contexts that are still doing work
|
310
|
+
if (mode === 'graceful' && hasOpenPages) {
|
311
|
+
consoleLogger.info(`Skipping BrowserContext in graceful (has ${pagesArr.length} open page(s))`);
|
312
|
+
continue; // leave it for the teardown pass
|
313
|
+
}
|
314
|
+
|
315
|
+
// (Optional speed-up) close pages first if any
|
316
|
+
if (hasOpenPages) {
|
317
|
+
consoleLogger.info(`Closing ${pagesArr.length} page(s) before context close...`);
|
318
|
+
for (const p of pagesArr) {
|
319
|
+
try { await Promise.race([p.close(), timeout(1500)]); } catch {}
|
320
|
+
}
|
321
|
+
}
|
322
|
+
|
323
|
+
consoleLogger.info('Closing BrowserContext...');
|
324
|
+
if (typeof ctx.close === 'function') {
|
325
|
+
await Promise.race([ctx.close(), timeout(timeoutMs)]);
|
326
|
+
}
|
327
|
+
consoleLogger.info('BrowserContext closed');
|
328
|
+
|
329
|
+
// also close its browser (persistent contexts)
|
330
|
+
const b = browser;
|
331
|
+
if (b && !(b as any).isClosed?.()) {
|
332
|
+
consoleLogger.info('Closing Browser (from context.browser())...');
|
333
|
+
if (typeof b.close === 'function') {
|
334
|
+
await Promise.race([b.close(), timeout(timeoutMs)]);
|
335
|
+
}
|
336
|
+
consoleLogger.info('Browser closed (from context.browser())');
|
337
|
+
}
|
338
|
+
} catch (err) {
|
339
|
+
consoleLogger.warn(`Error closing BrowserContext: ${(err as Error).message}`);
|
340
|
+
} finally {
|
341
|
+
// only delete from the set if we actually closed it (or tried to)
|
342
|
+
if (!(mode === 'graceful' && hasOpenPages)) {
|
343
|
+
constants.resources.browserContexts.delete(ctx);
|
344
|
+
}
|
345
|
+
}
|
346
|
+
}
|
347
|
+
|
348
|
+
// --- Browsers ---
|
349
|
+
for (const b of [...constants.resources.browsers]) {
|
350
|
+
try {
|
351
|
+
if ((b as any).isClosed?.()) {
|
352
|
+
consoleLogger.info('Skipping Browser (already closed)');
|
353
|
+
continue;
|
354
|
+
}
|
355
|
+
|
356
|
+
consoleLogger.info('Closing Browser...');
|
357
|
+
if (typeof b.close === 'function') {
|
358
|
+
await Promise.race([b.close(), timeout(timeoutMs)]);
|
359
|
+
}
|
360
|
+
consoleLogger.info('Browser closed');
|
361
|
+
} catch (err) {
|
362
|
+
consoleLogger.warn(`Error closing Browser: ${(err as Error).message}`);
|
363
|
+
} finally {
|
364
|
+
constants.resources.browsers.delete(b);
|
365
|
+
}
|
366
|
+
}
|
367
|
+
|
368
|
+
consoleLogger.info(`Stop browsers finished for mode=${mode}`);
|
369
|
+
})();
|
370
|
+
|
371
|
+
try {
|
372
|
+
await __stopAllLock;
|
373
|
+
} finally {
|
374
|
+
__stopAllLock = null;
|
375
|
+
}
|
376
|
+
}
|
377
|
+
|
378
|
+
export const cleanUp = async (randomToken?: string, isError: boolean = false): Promise<void> => {
|
379
|
+
|
380
|
+
if (isError) {
|
381
|
+
await stopAll({ mode: 'graceful', timeoutMs: 8000 });
|
382
|
+
await stopAll({ mode: 'teardown', timeoutMs: 4000 });
|
383
|
+
}
|
384
|
+
|
385
|
+
if (randomToken === undefined && constants.randomToken) {
|
386
|
+
randomToken = constants.randomToken;
|
387
|
+
}
|
388
|
+
|
389
|
+
if (constants.userDataDirectory) try {
|
390
|
+
fs.rmSync(constants.userDataDirectory, { recursive: true, force: true });
|
391
|
+
} catch (error) {
|
392
|
+
consoleLogger.warn(`Unable to force remove userDataDirectory: ${error.message}`);
|
393
|
+
}
|
394
|
+
|
395
|
+
if (randomToken !== undefined) {
|
396
|
+
const storagePath = getStoragePath(randomToken);
|
397
|
+
|
398
|
+
try {
|
399
|
+
fs.rmSync(path.join(storagePath, 'crawlee'), { recursive: true, force: true });
|
400
|
+
} catch (error) {
|
401
|
+
consoleLogger.warn(`Unable to force remove crawlee folder: ${error.message}`);
|
402
|
+
}
|
403
|
+
|
404
|
+
let deleteErrorLogFile = true;
|
405
|
+
|
406
|
+
if (isError) {
|
407
|
+
let logsPath = storagePath;
|
408
|
+
|
409
|
+
if (process.env.OOBEE_LOGS_PATH) {
|
410
|
+
logsPath = process.env.OOBEE_LOGS_PATH;
|
411
|
+
}
|
412
|
+
|
413
|
+
if (fs.existsSync(errorsTxtPath)) {
|
414
|
+
try {
|
415
|
+
const logFilePath = path.join(logsPath, `logs-${randomToken}.txt`);
|
416
|
+
fs.copyFileSync(errorsTxtPath, logFilePath);
|
417
|
+
console.log(`An error occured. Log file is located at: ${logFilePath}`);
|
418
|
+
|
419
|
+
} catch (copyError) {
|
420
|
+
consoleLogger.error(`Error copying errors file during cleanup: ${copyError.message}`);
|
421
|
+
console.log(`An error occured. Log file is located at: ${errorsTxtPath}`);
|
422
|
+
deleteErrorLogFile = false; // Do not delete the log file if copy failed
|
423
|
+
}
|
424
|
+
|
425
|
+
if (deleteErrorLogFile && fs.existsSync(errorsTxtPath)) {
|
426
|
+
try {
|
427
|
+
fs.unlinkSync(errorsTxtPath);
|
428
|
+
} catch (error) {
|
429
|
+
consoleLogger.warn(`Unable to delete log file ${errorsTxtPath}: ${error.message}`);
|
430
|
+
}
|
431
|
+
}
|
432
|
+
|
433
|
+
}
|
434
|
+
|
435
|
+
}
|
436
|
+
|
437
|
+
if (fs.existsSync(storagePath) && fs.readdirSync(storagePath).length === 0) {
|
438
|
+
try {
|
439
|
+
fs.rmdirSync(storagePath);
|
440
|
+
consoleLogger.info(`Deleted empty storage path: ${storagePath}`);
|
441
|
+
|
442
|
+
} catch (error) {
|
443
|
+
consoleLogger.warn(`Error deleting empty storage path ${storagePath}: ${error.message}`);
|
444
|
+
}
|
445
|
+
}
|
446
|
+
|
447
|
+
consoleLogger.info(`Clean up completed for: ${randomToken}`);
|
448
|
+
}
|
449
|
+
|
450
|
+
};
|
451
|
+
|
452
|
+
export const cleanUpAndExit = async (
|
453
|
+
exitCode: number,
|
454
|
+
randomToken?: string,
|
455
|
+
isError: boolean = false,
|
456
|
+
): Promise<void> => {
|
457
|
+
if (__shuttingDown) {
|
458
|
+
consoleLogger.info('Cleanup already in progress; ignoring duplicate exit request.');
|
459
|
+
return;
|
460
|
+
}
|
461
|
+
__shuttingDown = true;
|
462
|
+
|
463
|
+
try {
|
464
|
+
await cleanUp(randomToken, isError); // runs stopAll inside cleanUp
|
465
|
+
} catch (e: any) {
|
466
|
+
consoleLogger.warn(`Cleanup error: ${e?.message || e}`);
|
467
|
+
}
|
468
|
+
|
469
|
+
consoleLogger.info(`Exiting with code: ${exitCode}`);
|
470
|
+
process.exit(exitCode); // explicit exit after cleanup completes
|
471
|
+
};
|
472
|
+
|
473
|
+
export const listenForCleanUp = (randomToken: string): void => {
|
474
|
+
consoleLogger.info(`PID: ${process.pid}`);
|
475
|
+
|
476
|
+
process.on('SIGINT', async () => { // ← keep handler installed
|
477
|
+
consoleLogger.info('SIGINT received. Cleaning up and exiting.');
|
478
|
+
await cleanUpAndExit(130, randomToken, true);
|
479
|
+
});
|
480
|
+
|
481
|
+
process.on('SIGTERM', async () => { // ← keep handler installed
|
482
|
+
consoleLogger.info('SIGTERM received. Cleaning up and exiting.');
|
483
|
+
await cleanUpAndExit(143, randomToken, true);
|
484
|
+
});
|
198
485
|
};
|
199
486
|
|
200
487
|
export const getWcagPassPercentage = (
|
@@ -711,16 +998,18 @@ export const zipResults = (zipName: string, resultsPath: string): void => {
|
|
711
998
|
fs.unlinkSync(zipName);
|
712
999
|
}
|
713
1000
|
|
1001
|
+
// Check if user specified absolute or relative path
|
1002
|
+
const zipFilePath = path.isAbsolute(zipName) ? zipName : path.join(process.cwd(), zipName);
|
1003
|
+
|
1004
|
+
|
714
1005
|
if (os.platform() === 'win32') {
|
715
1006
|
execSync(
|
716
|
-
`
|
717
|
-
{
|
1007
|
+
`tar.exe -a -c -f "${zipFilePath}" *`,
|
1008
|
+
{ cwd: resultsPath },
|
718
1009
|
);
|
719
1010
|
} else {
|
720
1011
|
// Get zip command in Mac and Linux
|
721
1012
|
const command = '/usr/bin/zip';
|
722
|
-
// Check if user specified absolute or relative path
|
723
|
-
const zipFilePath = path.isAbsolute(zipName) ? zipName : path.join(process.cwd(), zipName);
|
724
1013
|
|
725
1014
|
// To zip up files recursively (-r) in the results folder path and write it to user's specified path
|
726
1015
|
const args = ['-r', zipFilePath, '.'];
|