@uxf/scripts 11.62.1 → 11.62.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -43,6 +43,11 @@ Environment variables:
|
|
|
43
43
|
type: "boolean",
|
|
44
44
|
group: "Options",
|
|
45
45
|
})
|
|
46
|
+
.option("check-missing", {
|
|
47
|
+
describe: "If indexable pages missing in sitemap should be reported. (--with-nested must be used)",
|
|
48
|
+
type: "boolean",
|
|
49
|
+
group: "Options",
|
|
50
|
+
})
|
|
46
51
|
.option("google-chat-webhook-url", {
|
|
47
52
|
describe: "Webhook URL of Google Chat where to send the results.",
|
|
48
53
|
type: "string",
|
|
@@ -70,6 +75,7 @@ Environment variables:
|
|
|
70
75
|
skip,
|
|
71
76
|
options["with-nested"],
|
|
72
77
|
options["with-images"],
|
|
78
|
+
options["check-missing"],
|
|
73
79
|
options["google-chat-webhook-url"],
|
|
74
80
|
);
|
|
75
81
|
} catch (e) {
|
|
@@ -9,15 +9,13 @@ const { HTTP_USERNAME, HTTP_PASSWORD } = process.env;
|
|
|
9
9
|
|
|
10
10
|
/**
|
|
11
11
|
* @typedef {{parentUrl: (string | undefined), isImg: boolean, isWebPage: boolean, ttl: number, url: string, status: number, message: (string | undefined), skipped: boolean, html: (string | null), redirected: boolean}} UrlCheckResponse
|
|
12
|
-
*/
|
|
13
|
-
|
|
14
|
-
/**
|
|
15
12
|
* @typedef {{url: string, parentUrl: (string | undefined), canonicalUrl: (string | null), isImg: boolean, isWebPage: boolean, ttl: number, status: number, message: (string | undefined), skipped: boolean, indexable: boolean, redirected: boolean}} TestedUrlDto
|
|
13
|
+
* @typedef {{errors: TestedUrlDto[], duplicates: unknown[], missingInSitemap: (*|*[]), ok: TestedUrlDto[], skippedUrls: TestedUrlDto[], errorsSum: number}} ResultDto
|
|
16
14
|
*/
|
|
17
15
|
|
|
18
16
|
const DUPLICATES_TITLE = "\n\n\nDuplicated pages in sitemap:\n";
|
|
19
17
|
const MISSING_TITLE = "\n\n\nMissing pages in sitemap:\n";
|
|
20
|
-
const ERROR_TITLE = "\n\n\nErrors:\n"
|
|
18
|
+
const ERROR_TITLE = "\n\n\nErrors:\n";
|
|
21
19
|
|
|
22
20
|
const MAX_TTL = 3;
|
|
23
21
|
const IMAGES_LABEL = "🏞 Images:";
|
|
@@ -28,10 +26,27 @@ const URLS_LABEL = "🔗 Links:";
|
|
|
28
26
|
*/
|
|
29
27
|
const TESTED_URLS = [];
|
|
30
28
|
const URLS_TO_CHECK = new Set();
|
|
31
|
-
const ERRORS = [];
|
|
32
29
|
|
|
33
30
|
const robotsParser = robotsTxtParser({ userAgent: "uxf-bot", allowOnNeutral: false });
|
|
34
31
|
|
|
32
|
+
const HOSTNAME_ROBOTS_MAP = {
|
|
33
|
+
"fb.me": "facebook.com",
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* @param url {string}
|
|
38
|
+
* @returns {string}
|
|
39
|
+
*/
|
|
40
|
+
function getUrlOrigin(url) {
|
|
41
|
+
const urlObject = new URL(url);
|
|
42
|
+
|
|
43
|
+
if (urlObject.hostname in HOSTNAME_ROBOTS_MAP) {
|
|
44
|
+
return new URL(url.replace(urlObject.hostname, HOSTNAME_ROBOTS_MAP[urlObject.hostname])).origin;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return urlObject.origin;
|
|
48
|
+
}
|
|
49
|
+
|
|
35
50
|
/**
|
|
36
51
|
* @param url {string}
|
|
37
52
|
* @param options {{redirect: boolean, isExternal: boolean}}
|
|
@@ -116,13 +131,6 @@ function createErrorResult(errors) {
|
|
|
116
131
|
}
|
|
117
132
|
}
|
|
118
133
|
|
|
119
|
-
if (ERRORS.length > 0) {
|
|
120
|
-
generalErrors = `\n\nGeneral errors:\n`;
|
|
121
|
-
for (const error of ERRORS) {
|
|
122
|
-
generalErrors += `${createTabSpace(1)}${error}\n`;
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
|
|
126
134
|
return parentPages + nestedPages + generalErrors;
|
|
127
135
|
}
|
|
128
136
|
|
|
@@ -194,7 +202,7 @@ async function fetchUrl(url, webUrl, parentUrl = undefined, ttl = 1) {
|
|
|
194
202
|
}
|
|
195
203
|
|
|
196
204
|
try {
|
|
197
|
-
const origin =
|
|
205
|
+
const origin = getUrlOrigin(url);
|
|
198
206
|
|
|
199
207
|
if (parentUrl && origin !== webUrl) {
|
|
200
208
|
await robotsParser.useRobotsFor(origin);
|
|
@@ -344,17 +352,13 @@ async function testSitemapUrls(urls, webUrl, sitemapUrl, skip, withNested, withI
|
|
|
344
352
|
* @return {Promise<void>}
|
|
345
353
|
*/
|
|
346
354
|
async function testNestedUrls(html, parentUrl, parentIndex, webUrl) {
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
);
|
|
355
|
+
const $ = cheerio.load(html.toString());
|
|
356
|
+
let urls = createCorrectLinks(
|
|
357
|
+
$("a[href]").map((i, node) => $(node).attr("href")),
|
|
358
|
+
webUrl,
|
|
359
|
+
);
|
|
353
360
|
|
|
354
|
-
|
|
355
|
-
} catch (e) {
|
|
356
|
-
ERRORS.push(`Can't test all nested pages for ${parentUrl} - ${e.message}`);
|
|
357
|
-
}
|
|
361
|
+
await testNested(urls, parentIndex, parentUrl, createTabSpace() + URLS_LABEL, webUrl);
|
|
358
362
|
}
|
|
359
363
|
|
|
360
364
|
/**
|
|
@@ -365,17 +369,13 @@ async function testNestedUrls(html, parentUrl, parentIndex, webUrl) {
|
|
|
365
369
|
* @return {Promise<void>}
|
|
366
370
|
*/
|
|
367
371
|
async function testNestedImages(html, parentUrl, parentIndex, webUrl) {
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
);
|
|
372
|
+
const $ = cheerio.load(html);
|
|
373
|
+
const images = createCorrectLinks(
|
|
374
|
+
$("img[src]").map((i, node) => $(node).attr("src")),
|
|
375
|
+
webUrl,
|
|
376
|
+
);
|
|
374
377
|
|
|
375
|
-
|
|
376
|
-
} catch (e) {
|
|
377
|
-
ERRORS.push(`Can't test all nested images for ${parentUrl} - ${e.message}`);
|
|
378
|
-
}
|
|
378
|
+
await testNested(images, parentIndex, parentUrl, createTabSpace() + IMAGES_LABEL, webUrl);
|
|
379
379
|
}
|
|
380
380
|
|
|
381
381
|
/**
|
|
@@ -436,7 +436,7 @@ function convertTime(millis) {
|
|
|
436
436
|
}
|
|
437
437
|
|
|
438
438
|
/**
|
|
439
|
-
* @param okResults {
|
|
439
|
+
* @param okResults {TestedUrlDto[]}
|
|
440
440
|
* @param time {number}
|
|
441
441
|
*/
|
|
442
442
|
function logStatistics(okResults, time) {
|
|
@@ -484,6 +484,7 @@ function getPagesShouldBeInSitemap(webUrl) {
|
|
|
484
484
|
(dto) =>
|
|
485
485
|
(dto.canonicalUrl ?? dto.url).startsWith(webUrl) &&
|
|
486
486
|
!(dto.canonicalUrl ?? dto.url).includes("?") &&
|
|
487
|
+
!(dto.canonicalUrl ?? dto.url).includes("#") &&
|
|
487
488
|
dto.status === 200 &&
|
|
488
489
|
dto.ttl <= 1 &&
|
|
489
490
|
!dto.redirected &&
|
|
@@ -492,15 +493,91 @@ function getPagesShouldBeInSitemap(webUrl) {
|
|
|
492
493
|
.map((url) => (url.canonicalUrl ?? url.url).toLowerCase());
|
|
493
494
|
}
|
|
494
495
|
|
|
496
|
+
/**
|
|
497
|
+
* @param sitemapUrl {string}
|
|
498
|
+
* @param webUrl {string}
|
|
499
|
+
* @param withNested {boolean}
|
|
500
|
+
* @param withImages {boolean}
|
|
501
|
+
* @param checkMissing {boolean}
|
|
502
|
+
* @param shouldReportMissing {boolean}
|
|
503
|
+
* @return {void}
|
|
504
|
+
*/
|
|
505
|
+
function logInitialInfo(sitemapUrl, webUrl, withNested, withImages, checkMissing, shouldReportMissing) {
|
|
506
|
+
stdout.write(`${createTabSpace()}Sitemap url: ${sitemapUrl}\n`);
|
|
507
|
+
stdout.write(`${createTabSpace()}Web url: ${webUrl}\n\n`);
|
|
508
|
+
if (withNested) {
|
|
509
|
+
stdout.write(`${createTabSpace()}Will test nested links\n`);
|
|
510
|
+
}
|
|
511
|
+
if (withImages) {
|
|
512
|
+
stdout.write(`${createTabSpace()}Will test images\n\n`);
|
|
513
|
+
}
|
|
514
|
+
if (checkMissing) {
|
|
515
|
+
if (!shouldReportMissing) {
|
|
516
|
+
stdout.write(`${createTabSpace()}--check-missing option is only available with --with-nested option!\n`);
|
|
517
|
+
process.exit(1);
|
|
518
|
+
}
|
|
519
|
+
stdout.write(`${createTabSpace()}Will look for pages missing in sitemap\n\n`);
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
/**
|
|
524
|
+
* @param webUrl {string}
|
|
525
|
+
* @param result {ResultDto}
|
|
526
|
+
* @returns {string}
|
|
527
|
+
*/
|
|
528
|
+
function logResultErrors(webUrl, result) {
|
|
529
|
+
let chatMessage = `Result for ${webUrl}:\n\n`;
|
|
530
|
+
const duplicatesText = result.duplicates.map((url) => `${createTabSpace()}${url}`).join("\n");
|
|
531
|
+
const missingText = result.missingInSitemap.map((url) => `${createTabSpace()}${url}`).join("\n");
|
|
532
|
+
const errorText = createErrorResult(result.errors);
|
|
533
|
+
|
|
534
|
+
if (duplicatesText) {
|
|
535
|
+
logErrors(duplicatesText, DUPLICATES_TITLE);
|
|
536
|
+
chatMessage += DUPLICATES_TITLE + duplicatesText;
|
|
537
|
+
}
|
|
538
|
+
if (missingText) {
|
|
539
|
+
logErrors(missingText, MISSING_TITLE);
|
|
540
|
+
chatMessage += MISSING_TITLE + missingText;
|
|
541
|
+
}
|
|
542
|
+
if (errorText) {
|
|
543
|
+
logErrors(errorText, ERROR_TITLE);
|
|
544
|
+
chatMessage += ERROR_TITLE + errorText;
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
return chatMessage;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
/**
|
|
551
|
+
* @param webUrl {string}
|
|
552
|
+
* @param sitemapUrls {string[]}
|
|
553
|
+
* @param shouldReportMissing {boolean}
|
|
554
|
+
* @returns {ResultDto}
|
|
555
|
+
*/
|
|
556
|
+
function getResult(webUrl, sitemapUrls, shouldReportMissing) {
|
|
557
|
+
const shouldBeInSitemap = getPagesShouldBeInSitemap(webUrl);
|
|
558
|
+
const errors = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === false);
|
|
559
|
+
const duplicates = [...new Set(sitemapUrls.filter((item, index, self) => self.indexOf(item) !== index))];
|
|
560
|
+
const missingInSitemap = shouldReportMissing
|
|
561
|
+
? shouldBeInSitemap.filter((testedUrl) => !sitemapUrls.includes(testedUrl))
|
|
562
|
+
: [];
|
|
563
|
+
const ok = TESTED_URLS.filter((r) => r.status === 200);
|
|
564
|
+
const skippedUrls = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === true);
|
|
565
|
+
|
|
566
|
+
const errorsSum = missingInSitemap.length + duplicates.length + errors.length;
|
|
567
|
+
|
|
568
|
+
return { errors, duplicates, missingInSitemap, ok, skippedUrls, errorsSum };
|
|
569
|
+
}
|
|
570
|
+
|
|
495
571
|
/**
|
|
496
572
|
* @param sitemapUrl {string}
|
|
497
573
|
* @param skip {number}
|
|
498
574
|
* @param withNested {boolean}
|
|
499
575
|
* @param withImages {boolean}
|
|
576
|
+
* @param checkMissing {boolean}
|
|
500
577
|
* @param googleWebhookUrl {string|undefined}
|
|
501
578
|
* @return {Promise<*>}
|
|
502
579
|
*/
|
|
503
|
-
module.exports = async function run(sitemapUrl, skip, withNested, withImages, googleWebhookUrl) {
|
|
580
|
+
module.exports = async function run(sitemapUrl, skip, withNested, withImages, checkMissing, googleWebhookUrl) {
|
|
504
581
|
if (!sitemapUrl) {
|
|
505
582
|
stdout.write("⛔ Required parameter --url is empty.\n");
|
|
506
583
|
return process.exit(1);
|
|
@@ -509,56 +586,29 @@ module.exports = async function run(sitemapUrl, skip, withNested, withImages, go
|
|
|
509
586
|
const url = new URL(sitemapUrl);
|
|
510
587
|
const webUrl = url.origin;
|
|
511
588
|
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
stdout.write(`${createTabSpace()}Will test nested links\n`);
|
|
516
|
-
}
|
|
517
|
-
if (withImages) {
|
|
518
|
-
stdout.write(`${createTabSpace()}Will test images\n\n`);
|
|
519
|
-
}
|
|
589
|
+
const shouldReportMissing = checkMissing && withNested;
|
|
590
|
+
|
|
591
|
+
logInitialInfo(sitemapUrl, webUrl, withNested, withImages, checkMissing, shouldReportMissing);
|
|
520
592
|
|
|
521
593
|
const startTime = performance.now();
|
|
522
594
|
const sitemapUrls = await Sitemap.getSitemap(sitemapUrl);
|
|
523
595
|
await testSitemapUrls(sitemapUrls, webUrl, sitemapUrl, skip, withNested, withImages);
|
|
524
596
|
const finishTime = performance.now();
|
|
525
597
|
|
|
526
|
-
const
|
|
527
|
-
|
|
528
|
-
const errors = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === false);
|
|
529
|
-
const duplicates = [...new Set(sitemapUrls.filter((item, index, self) => self.indexOf(item) !== index))];
|
|
530
|
-
const skippedUrls = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === true);
|
|
531
|
-
const ok = TESTED_URLS.filter((r) => r.status === 200);
|
|
532
|
-
const missingInSitemap = shouldBeInSitemap.filter((testedUrl) => !sitemapUrls.includes(testedUrl));
|
|
533
|
-
|
|
534
|
-
if (missingInSitemap.length > 0 || duplicates.length > 0 || errors.length > 0 || ERRORS.length > 0) {
|
|
535
|
-
let chatMessage = "";
|
|
536
|
-
const duplicatesText = duplicates.map((url) => `${createTabSpace()}${url}`).join("\n");
|
|
537
|
-
const missingText = missingInSitemap.map((url) => `${createTabSpace()}${url}`).join("\n");
|
|
538
|
-
const errorText = createErrorResult(errors);
|
|
598
|
+
const result = getResult(webUrl, sitemapUrls, shouldReportMissing);
|
|
539
599
|
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
chatMessage += DUPLICATES_TITLE + duplicatesText;
|
|
543
|
-
}
|
|
544
|
-
if (missingText) {
|
|
545
|
-
logErrors(missingText, MISSING_TITLE);
|
|
546
|
-
chatMessage += MISSING_TITLE + missingText;
|
|
547
|
-
}
|
|
548
|
-
if (errorText) {
|
|
549
|
-
logErrors(errorText, ERROR_TITLE);
|
|
550
|
-
chatMessage += ERROR_TITLE + errorText;
|
|
551
|
-
}
|
|
600
|
+
if (result.errorsSum > 0) {
|
|
601
|
+
const chatMessage = logResultErrors(webUrl, result);
|
|
552
602
|
|
|
553
603
|
await sendGoogleChatMessage(chatMessage, googleWebhookUrl);
|
|
554
604
|
}
|
|
555
605
|
|
|
556
|
-
if (skippedUrls.length > 0) {
|
|
557
|
-
const skippedUrlsText = createSkippedResult(skippedUrls);
|
|
606
|
+
if (result.skippedUrls.length > 0) {
|
|
607
|
+
const skippedUrlsText = createSkippedResult(result.skippedUrls);
|
|
558
608
|
logErrors(skippedUrlsText, "\nSkipped origins:\n");
|
|
559
609
|
}
|
|
560
610
|
|
|
561
|
-
logStatistics(ok, Math.ceil(finishTime - startTime));
|
|
611
|
+
logStatistics(result.ok, Math.ceil(finishTime - startTime));
|
|
562
612
|
|
|
563
|
-
process.exit(
|
|
613
|
+
process.exit(result.errorsSum > 0 ? 1 : 0);
|
|
564
614
|
};
|