@uxf/scripts 11.62.0 → 11.62.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -43,6 +43,11 @@ Environment variables:
|
|
|
43
43
|
type: "boolean",
|
|
44
44
|
group: "Options",
|
|
45
45
|
})
|
|
46
|
+
.option("check-missing", {
|
|
47
|
+
describe: "If indexable pages missing in sitemap should be reported. (--with-nested must be used)",
|
|
48
|
+
type: "boolean",
|
|
49
|
+
group: "Options",
|
|
50
|
+
})
|
|
46
51
|
.option("google-chat-webhook-url", {
|
|
47
52
|
describe: "Webhook URL of Google Chat where to send the results.",
|
|
48
53
|
type: "string",
|
|
@@ -70,6 +75,7 @@ Environment variables:
|
|
|
70
75
|
skip,
|
|
71
76
|
options["with-nested"],
|
|
72
77
|
options["with-images"],
|
|
78
|
+
options["check-missing"],
|
|
73
79
|
options["google-chat-webhook-url"],
|
|
74
80
|
);
|
|
75
81
|
} catch (e) {
|
|
@@ -9,12 +9,14 @@ const { HTTP_USERNAME, HTTP_PASSWORD } = process.env;
|
|
|
9
9
|
|
|
10
10
|
/**
|
|
11
11
|
* @typedef {{parentUrl: (string | undefined), isImg: boolean, isWebPage: boolean, ttl: number, url: string, status: number, message: (string | undefined), skipped: boolean, html: (string | null), redirected: boolean}} UrlCheckResponse
|
|
12
|
-
*/
|
|
13
|
-
|
|
14
|
-
/**
|
|
15
12
|
* @typedef {{url: string, parentUrl: (string | undefined), canonicalUrl: (string | null), isImg: boolean, isWebPage: boolean, ttl: number, status: number, message: (string | undefined), skipped: boolean, indexable: boolean, redirected: boolean}} TestedUrlDto
|
|
13
|
+
* @typedef {{errors: TestedUrlDto[], duplicates: unknown[], missingInSitemap: (*|*[]), ok: TestedUrlDto[], skippedUrls: TestedUrlDto[], errorsSum: number}} ResultDto
|
|
16
14
|
*/
|
|
17
15
|
|
|
16
|
+
const DUPLICATES_TITLE = "\n\n\nDuplicated pages in sitemap:\n";
|
|
17
|
+
const MISSING_TITLE = "\n\n\nMissing pages in sitemap:\n";
|
|
18
|
+
const ERROR_TITLE = "\n\n\nErrors:\n"
|
|
19
|
+
|
|
18
20
|
const MAX_TTL = 3;
|
|
19
21
|
const IMAGES_LABEL = "🏞 Images:";
|
|
20
22
|
const URLS_LABEL = "🔗 Links:";
|
|
@@ -24,7 +26,6 @@ const URLS_LABEL = "🔗 Links:";
|
|
|
24
26
|
*/
|
|
25
27
|
const TESTED_URLS = [];
|
|
26
28
|
const URLS_TO_CHECK = new Set();
|
|
27
|
-
const ERRORS = [];
|
|
28
29
|
|
|
29
30
|
const robotsParser = robotsTxtParser({ userAgent: "uxf-bot", allowOnNeutral: false });
|
|
30
31
|
|
|
@@ -75,7 +76,7 @@ function createErrorList(errors) {
|
|
|
75
76
|
}
|
|
76
77
|
|
|
77
78
|
/**
|
|
78
|
-
* @param errors {
|
|
79
|
+
* @param errors {TestedUrlDto[]}
|
|
79
80
|
* @return {string}
|
|
80
81
|
*/
|
|
81
82
|
function createErrorResult(errors) {
|
|
@@ -112,18 +113,11 @@ function createErrorResult(errors) {
|
|
|
112
113
|
}
|
|
113
114
|
}
|
|
114
115
|
|
|
115
|
-
if (ERRORS.length > 0) {
|
|
116
|
-
generalErrors = `\n\nGeneral errors:\n`;
|
|
117
|
-
for (const error of ERRORS) {
|
|
118
|
-
generalErrors += `${createTabSpace(1)}${error}\n`;
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
|
|
122
116
|
return parentPages + nestedPages + generalErrors;
|
|
123
117
|
}
|
|
124
118
|
|
|
125
119
|
/**
|
|
126
|
-
* @param skippedUrls {
|
|
120
|
+
* @param skippedUrls {TestedUrlDto[]}
|
|
127
121
|
* @return {string}
|
|
128
122
|
*/
|
|
129
123
|
function createSkippedResult(skippedUrls) {
|
|
@@ -340,20 +334,13 @@ async function testSitemapUrls(urls, webUrl, sitemapUrl, skip, withNested, withI
|
|
|
340
334
|
* @return {Promise<void>}
|
|
341
335
|
*/
|
|
342
336
|
async function testNestedUrls(html, parentUrl, parentIndex, webUrl) {
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
);
|
|
349
|
-
|
|
350
|
-
// FIXME
|
|
351
|
-
urls = urls.filter((url) => url.startsWith(webUrl) || url.startsWith("/"));
|
|
337
|
+
const $ = cheerio.load(html.toString());
|
|
338
|
+
let urls = createCorrectLinks(
|
|
339
|
+
$("a[href]").map((i, node) => $(node).attr("href")),
|
|
340
|
+
webUrl,
|
|
341
|
+
);
|
|
352
342
|
|
|
353
|
-
|
|
354
|
-
} catch (e) {
|
|
355
|
-
ERRORS.push(`Can't test all nested pages for ${parentUrl} - ${e.message}`);
|
|
356
|
-
}
|
|
343
|
+
await testNested(urls, parentIndex, parentUrl, createTabSpace() + URLS_LABEL, webUrl);
|
|
357
344
|
}
|
|
358
345
|
|
|
359
346
|
/**
|
|
@@ -364,17 +351,13 @@ async function testNestedUrls(html, parentUrl, parentIndex, webUrl) {
|
|
|
364
351
|
* @return {Promise<void>}
|
|
365
352
|
*/
|
|
366
353
|
async function testNestedImages(html, parentUrl, parentIndex, webUrl) {
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
);
|
|
354
|
+
const $ = cheerio.load(html);
|
|
355
|
+
const images = createCorrectLinks(
|
|
356
|
+
$("img[src]").map((i, node) => $(node).attr("src")),
|
|
357
|
+
webUrl,
|
|
358
|
+
);
|
|
373
359
|
|
|
374
|
-
|
|
375
|
-
} catch (e) {
|
|
376
|
-
ERRORS.push(`Can't test all nested images for ${parentUrl} - ${e.message}`);
|
|
377
|
-
}
|
|
360
|
+
await testNested(images, parentIndex, parentUrl, createTabSpace() + IMAGES_LABEL, webUrl);
|
|
378
361
|
}
|
|
379
362
|
|
|
380
363
|
/**
|
|
@@ -435,7 +418,7 @@ function convertTime(millis) {
|
|
|
435
418
|
}
|
|
436
419
|
|
|
437
420
|
/**
|
|
438
|
-
* @param okResults {
|
|
421
|
+
* @param okResults {TestedUrlDto[]}
|
|
439
422
|
* @param time {number}
|
|
440
423
|
*/
|
|
441
424
|
function logStatistics(okResults, time) {
|
|
@@ -483,6 +466,7 @@ function getPagesShouldBeInSitemap(webUrl) {
|
|
|
483
466
|
(dto) =>
|
|
484
467
|
(dto.canonicalUrl ?? dto.url).startsWith(webUrl) &&
|
|
485
468
|
!(dto.canonicalUrl ?? dto.url).includes("?") &&
|
|
469
|
+
!(dto.canonicalUrl ?? dto.url).includes("#") &&
|
|
486
470
|
dto.status === 200 &&
|
|
487
471
|
dto.ttl <= 1 &&
|
|
488
472
|
!dto.redirected &&
|
|
@@ -491,15 +475,89 @@ function getPagesShouldBeInSitemap(webUrl) {
|
|
|
491
475
|
.map((url) => (url.canonicalUrl ?? url.url).toLowerCase());
|
|
492
476
|
}
|
|
493
477
|
|
|
478
|
+
/**
|
|
479
|
+
* @param sitemapUrl {string}
|
|
480
|
+
* @param webUrl {string}
|
|
481
|
+
* @param withNested {boolean}
|
|
482
|
+
* @param withImages {boolean}
|
|
483
|
+
* @param checkMissing {boolean}
|
|
484
|
+
* @param shouldReportMissing {boolean}
|
|
485
|
+
* @return {void}
|
|
486
|
+
*/
|
|
487
|
+
function logInitialInfo(sitemapUrl, webUrl, withNested, withImages, checkMissing, shouldReportMissing) {
|
|
488
|
+
stdout.write(`${createTabSpace()}Sitemap url: ${sitemapUrl}\n`);
|
|
489
|
+
stdout.write(`${createTabSpace()}Web url: ${webUrl}\n\n`);
|
|
490
|
+
if (withNested) {
|
|
491
|
+
stdout.write(`${createTabSpace()}Will test nested links\n`);
|
|
492
|
+
}
|
|
493
|
+
if (withImages) {
|
|
494
|
+
stdout.write(`${createTabSpace()}Will test images\n\n`);
|
|
495
|
+
}
|
|
496
|
+
if (checkMissing) {
|
|
497
|
+
if (!shouldReportMissing) {
|
|
498
|
+
stdout.write(`${createTabSpace()}--check-missing option is only available with --with-nested option!\n`);
|
|
499
|
+
process.exit(1);
|
|
500
|
+
}
|
|
501
|
+
stdout.write(`${createTabSpace()}Will look for pages missing in sitemap\n\n`);
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
/**
|
|
506
|
+
* @param webUrl {string}
|
|
507
|
+
* @param result {ResultDto}
|
|
508
|
+
* @returns {string}
|
|
509
|
+
*/
|
|
510
|
+
function logResultErrors(webUrl, result) {
|
|
511
|
+
let chatMessage = `Result for ${webUrl}:\n\n`;
|
|
512
|
+
const duplicatesText = result.duplicates.map((url) => `${createTabSpace()}${url}`).join("\n");
|
|
513
|
+
const missingText = result.missingInSitemap.map((url) => `${createTabSpace()}${url}`).join("\n");
|
|
514
|
+
const errorText = createErrorResult(result.errors);
|
|
515
|
+
|
|
516
|
+
if (duplicatesText) {
|
|
517
|
+
logErrors(duplicatesText, DUPLICATES_TITLE);
|
|
518
|
+
chatMessage += DUPLICATES_TITLE + duplicatesText;
|
|
519
|
+
}
|
|
520
|
+
if (missingText) {
|
|
521
|
+
logErrors(missingText, MISSING_TITLE);
|
|
522
|
+
chatMessage += MISSING_TITLE + missingText;
|
|
523
|
+
}
|
|
524
|
+
if (errorText) {
|
|
525
|
+
logErrors(errorText, ERROR_TITLE);
|
|
526
|
+
chatMessage += ERROR_TITLE + errorText;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
return chatMessage;
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
/**
|
|
533
|
+
* @param webUrl {string}
|
|
534
|
+
* @param sitemapUrls {string[]}
|
|
535
|
+
* @param shouldReportMissing {boolean}
|
|
536
|
+
* @returns {ResultDto}
|
|
537
|
+
*/
|
|
538
|
+
function getResult(webUrl, sitemapUrls, shouldReportMissing) {
|
|
539
|
+
const shouldBeInSitemap = getPagesShouldBeInSitemap(webUrl);
|
|
540
|
+
const errors = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === false);
|
|
541
|
+
const duplicates = [...new Set(sitemapUrls.filter((item, index, self) => self.indexOf(item) !== index))];
|
|
542
|
+
const missingInSitemap = shouldReportMissing ? shouldBeInSitemap.filter((testedUrl) => !sitemapUrls.includes(testedUrl)) : [];
|
|
543
|
+
const ok = TESTED_URLS.filter((r) => r.status === 200);
|
|
544
|
+
const skippedUrls = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === true);
|
|
545
|
+
|
|
546
|
+
const errorsSum = missingInSitemap.length + duplicates.length + errors.length;
|
|
547
|
+
|
|
548
|
+
return {errors, duplicates, missingInSitemap, ok, skippedUrls, errorsSum};
|
|
549
|
+
}
|
|
550
|
+
|
|
494
551
|
/**
|
|
495
552
|
* @param sitemapUrl {string}
|
|
496
553
|
* @param skip {number}
|
|
497
554
|
* @param withNested {boolean}
|
|
498
555
|
* @param withImages {boolean}
|
|
556
|
+
* @param checkMissing {boolean}
|
|
499
557
|
* @param googleWebhookUrl {string|undefined}
|
|
500
558
|
* @return {Promise<*>}
|
|
501
559
|
*/
|
|
502
|
-
module.exports = async function run(sitemapUrl, skip, withNested, withImages, googleWebhookUrl) {
|
|
560
|
+
module.exports = async function run(sitemapUrl, skip, withNested, withImages, checkMissing, googleWebhookUrl) {
|
|
503
561
|
if (!sitemapUrl) {
|
|
504
562
|
stdout.write("⛔ Required parameter --url is empty.\n");
|
|
505
563
|
return process.exit(1);
|
|
@@ -508,50 +566,29 @@ module.exports = async function run(sitemapUrl, skip, withNested, withImages, go
|
|
|
508
566
|
const url = new URL(sitemapUrl);
|
|
509
567
|
const webUrl = url.origin;
|
|
510
568
|
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
stdout.write(`${createTabSpace()}Will test nested links\n`);
|
|
515
|
-
}
|
|
516
|
-
if (withImages) {
|
|
517
|
-
stdout.write(`${createTabSpace()}Will test images\n\n`);
|
|
518
|
-
}
|
|
569
|
+
const shouldReportMissing = checkMissing && withNested;
|
|
570
|
+
|
|
571
|
+
logInitialInfo(sitemapUrl, webUrl, withNested, withImages, checkMissing, shouldReportMissing);
|
|
519
572
|
|
|
520
573
|
const startTime = performance.now();
|
|
521
574
|
const sitemapUrls = await Sitemap.getSitemap(sitemapUrl);
|
|
522
575
|
await testSitemapUrls(sitemapUrls, webUrl, sitemapUrl, skip, withNested, withImages);
|
|
523
576
|
const finishTime = performance.now();
|
|
524
577
|
|
|
525
|
-
const
|
|
578
|
+
const result = getResult(webUrl, sitemapUrls, shouldReportMissing);
|
|
526
579
|
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
const missingInSitemap = shouldBeInSitemap.filter((testedUrl) => !sitemapUrls.includes(testedUrl));
|
|
532
|
-
|
|
533
|
-
if (missingInSitemap.length > 0 || duplicates.length > 0 || errors.length > 0 || ERRORS.length > 0) {
|
|
534
|
-
const duplicatesText = duplicates.map((url) => `${createTabSpace()}${url}`).join("\n");
|
|
535
|
-
const missingText = missingInSitemap.map((url) => `${createTabSpace()}${url}`).join("\n");
|
|
536
|
-
const errorText = createErrorResult(errors);
|
|
537
|
-
if (duplicatesText) {
|
|
538
|
-
logErrors(duplicatesText, "\n\n\nDuplicated pages in sitemap:\n");
|
|
539
|
-
}
|
|
540
|
-
if (missingText) {
|
|
541
|
-
logErrors(missingText, "\n\n\nMissing pages in sitemap:\n");
|
|
542
|
-
}
|
|
543
|
-
if (errorText) {
|
|
544
|
-
logErrors(errorText, "\n\n\nErrors:\n");
|
|
545
|
-
}
|
|
546
|
-
await sendGoogleChatMessage(duplicatesText + missingText + errorText, googleWebhookUrl);
|
|
580
|
+
if (result.errorsSum > 0) {
|
|
581
|
+
const chatMessage = logResultErrors(webUrl, result);
|
|
582
|
+
|
|
583
|
+
await sendGoogleChatMessage(chatMessage, googleWebhookUrl);
|
|
547
584
|
}
|
|
548
585
|
|
|
549
|
-
if (skippedUrls.length > 0) {
|
|
550
|
-
const skippedUrlsText = createSkippedResult(skippedUrls);
|
|
586
|
+
if (result.skippedUrls.length > 0) {
|
|
587
|
+
const skippedUrlsText = createSkippedResult(result.skippedUrls);
|
|
551
588
|
logErrors(skippedUrlsText, "\nSkipped origins:\n");
|
|
552
589
|
}
|
|
553
590
|
|
|
554
|
-
logStatistics(ok, Math.ceil(finishTime - startTime));
|
|
591
|
+
logStatistics(result.ok, Math.ceil(finishTime - startTime));
|
|
555
592
|
|
|
556
|
-
process.exit(
|
|
593
|
+
process.exit(result.errorsSum > 0 ? 1 : 0);
|
|
557
594
|
};
|