@uxf/scripts 11.62.1 → 11.62.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -43,6 +43,11 @@ Environment variables:
|
|
|
43
43
|
type: "boolean",
|
|
44
44
|
group: "Options",
|
|
45
45
|
})
|
|
46
|
+
.option("check-missing", {
|
|
47
|
+
describe: "If indexable pages missing in sitemap should be reported. (--with-nested must be used)",
|
|
48
|
+
type: "boolean",
|
|
49
|
+
group: "Options",
|
|
50
|
+
})
|
|
46
51
|
.option("google-chat-webhook-url", {
|
|
47
52
|
describe: "Webhook URL of Google Chat where to send the results.",
|
|
48
53
|
type: "string",
|
|
@@ -70,6 +75,7 @@ Environment variables:
|
|
|
70
75
|
skip,
|
|
71
76
|
options["with-nested"],
|
|
72
77
|
options["with-images"],
|
|
78
|
+
options["check-missing"],
|
|
73
79
|
options["google-chat-webhook-url"],
|
|
74
80
|
);
|
|
75
81
|
} catch (e) {
|
|
@@ -9,10 +9,8 @@ const { HTTP_USERNAME, HTTP_PASSWORD } = process.env;
|
|
|
9
9
|
|
|
10
10
|
/**
|
|
11
11
|
* @typedef {{parentUrl: (string | undefined), isImg: boolean, isWebPage: boolean, ttl: number, url: string, status: number, message: (string | undefined), skipped: boolean, html: (string | null), redirected: boolean}} UrlCheckResponse
|
|
12
|
-
*/
|
|
13
|
-
|
|
14
|
-
/**
|
|
15
12
|
* @typedef {{url: string, parentUrl: (string | undefined), canonicalUrl: (string | null), isImg: boolean, isWebPage: boolean, ttl: number, status: number, message: (string | undefined), skipped: boolean, indexable: boolean, redirected: boolean}} TestedUrlDto
|
|
13
|
+
* @typedef {{errors: TestedUrlDto[], duplicates: unknown[], missingInSitemap: (*|*[]), ok: TestedUrlDto[], skippedUrls: TestedUrlDto[], errorsSum: number}} ResultDto
|
|
16
14
|
*/
|
|
17
15
|
|
|
18
16
|
const DUPLICATES_TITLE = "\n\n\nDuplicated pages in sitemap:\n";
|
|
@@ -28,7 +26,6 @@ const URLS_LABEL = "🔗 Links:";
|
|
|
28
26
|
*/
|
|
29
27
|
const TESTED_URLS = [];
|
|
30
28
|
const URLS_TO_CHECK = new Set();
|
|
31
|
-
const ERRORS = [];
|
|
32
29
|
|
|
33
30
|
const robotsParser = robotsTxtParser({ userAgent: "uxf-bot", allowOnNeutral: false });
|
|
34
31
|
|
|
@@ -116,13 +113,6 @@ function createErrorResult(errors) {
|
|
|
116
113
|
}
|
|
117
114
|
}
|
|
118
115
|
|
|
119
|
-
if (ERRORS.length > 0) {
|
|
120
|
-
generalErrors = `\n\nGeneral errors:\n`;
|
|
121
|
-
for (const error of ERRORS) {
|
|
122
|
-
generalErrors += `${createTabSpace(1)}${error}\n`;
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
|
|
126
116
|
return parentPages + nestedPages + generalErrors;
|
|
127
117
|
}
|
|
128
118
|
|
|
@@ -344,17 +334,13 @@ async function testSitemapUrls(urls, webUrl, sitemapUrl, skip, withNested, withI
|
|
|
344
334
|
* @return {Promise<void>}
|
|
345
335
|
*/
|
|
346
336
|
async function testNestedUrls(html, parentUrl, parentIndex, webUrl) {
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
);
|
|
337
|
+
const $ = cheerio.load(html.toString());
|
|
338
|
+
let urls = createCorrectLinks(
|
|
339
|
+
$("a[href]").map((i, node) => $(node).attr("href")),
|
|
340
|
+
webUrl,
|
|
341
|
+
);
|
|
353
342
|
|
|
354
|
-
|
|
355
|
-
} catch (e) {
|
|
356
|
-
ERRORS.push(`Can't test all nested pages for ${parentUrl} - ${e.message}`);
|
|
357
|
-
}
|
|
343
|
+
await testNested(urls, parentIndex, parentUrl, createTabSpace() + URLS_LABEL, webUrl);
|
|
358
344
|
}
|
|
359
345
|
|
|
360
346
|
/**
|
|
@@ -365,17 +351,13 @@ async function testNestedUrls(html, parentUrl, parentIndex, webUrl) {
|
|
|
365
351
|
* @return {Promise<void>}
|
|
366
352
|
*/
|
|
367
353
|
async function testNestedImages(html, parentUrl, parentIndex, webUrl) {
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
);
|
|
354
|
+
const $ = cheerio.load(html);
|
|
355
|
+
const images = createCorrectLinks(
|
|
356
|
+
$("img[src]").map((i, node) => $(node).attr("src")),
|
|
357
|
+
webUrl,
|
|
358
|
+
);
|
|
374
359
|
|
|
375
|
-
|
|
376
|
-
} catch (e) {
|
|
377
|
-
ERRORS.push(`Can't test all nested images for ${parentUrl} - ${e.message}`);
|
|
378
|
-
}
|
|
360
|
+
await testNested(images, parentIndex, parentUrl, createTabSpace() + IMAGES_LABEL, webUrl);
|
|
379
361
|
}
|
|
380
362
|
|
|
381
363
|
/**
|
|
@@ -436,7 +418,7 @@ function convertTime(millis) {
|
|
|
436
418
|
}
|
|
437
419
|
|
|
438
420
|
/**
|
|
439
|
-
* @param okResults {
|
|
421
|
+
* @param okResults {TestedUrlDto[]}
|
|
440
422
|
* @param time {number}
|
|
441
423
|
*/
|
|
442
424
|
function logStatistics(okResults, time) {
|
|
@@ -484,6 +466,7 @@ function getPagesShouldBeInSitemap(webUrl) {
|
|
|
484
466
|
(dto) =>
|
|
485
467
|
(dto.canonicalUrl ?? dto.url).startsWith(webUrl) &&
|
|
486
468
|
!(dto.canonicalUrl ?? dto.url).includes("?") &&
|
|
469
|
+
!(dto.canonicalUrl ?? dto.url).includes("#") &&
|
|
487
470
|
dto.status === 200 &&
|
|
488
471
|
dto.ttl <= 1 &&
|
|
489
472
|
!dto.redirected &&
|
|
@@ -492,15 +475,89 @@ function getPagesShouldBeInSitemap(webUrl) {
|
|
|
492
475
|
.map((url) => (url.canonicalUrl ?? url.url).toLowerCase());
|
|
493
476
|
}
|
|
494
477
|
|
|
478
|
+
/**
|
|
479
|
+
* @param sitemapUrl {string}
|
|
480
|
+
* @param webUrl {string}
|
|
481
|
+
* @param withNested {boolean}
|
|
482
|
+
* @param withImages {boolean}
|
|
483
|
+
* @param checkMissing {boolean}
|
|
484
|
+
* @param shouldReportMissing {boolean}
|
|
485
|
+
* @return {void}
|
|
486
|
+
*/
|
|
487
|
+
function logInitialInfo(sitemapUrl, webUrl, withNested, withImages, checkMissing, shouldReportMissing) {
|
|
488
|
+
stdout.write(`${createTabSpace()}Sitemap url: ${sitemapUrl}\n`);
|
|
489
|
+
stdout.write(`${createTabSpace()}Web url: ${webUrl}\n\n`);
|
|
490
|
+
if (withNested) {
|
|
491
|
+
stdout.write(`${createTabSpace()}Will test nested links\n`);
|
|
492
|
+
}
|
|
493
|
+
if (withImages) {
|
|
494
|
+
stdout.write(`${createTabSpace()}Will test images\n\n`);
|
|
495
|
+
}
|
|
496
|
+
if (checkMissing) {
|
|
497
|
+
if (!shouldReportMissing) {
|
|
498
|
+
stdout.write(`${createTabSpace()}--check-missing option is only available with --with-nested option!\n`);
|
|
499
|
+
process.exit(1);
|
|
500
|
+
}
|
|
501
|
+
stdout.write(`${createTabSpace()}Will look for pages missing in sitemap\n\n`);
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
/**
|
|
506
|
+
* @param webUrl {string}
|
|
507
|
+
* @param result {ResultDto}
|
|
508
|
+
* @returns {string}
|
|
509
|
+
*/
|
|
510
|
+
function logResultErrors(webUrl, result) {
|
|
511
|
+
let chatMessage = `Result for ${webUrl}:\n\n`;
|
|
512
|
+
const duplicatesText = result.duplicates.map((url) => `${createTabSpace()}${url}`).join("\n");
|
|
513
|
+
const missingText = result.missingInSitemap.map((url) => `${createTabSpace()}${url}`).join("\n");
|
|
514
|
+
const errorText = createErrorResult(result.errors);
|
|
515
|
+
|
|
516
|
+
if (duplicatesText) {
|
|
517
|
+
logErrors(duplicatesText, DUPLICATES_TITLE);
|
|
518
|
+
chatMessage += DUPLICATES_TITLE + duplicatesText;
|
|
519
|
+
}
|
|
520
|
+
if (missingText) {
|
|
521
|
+
logErrors(missingText, MISSING_TITLE);
|
|
522
|
+
chatMessage += MISSING_TITLE + missingText;
|
|
523
|
+
}
|
|
524
|
+
if (errorText) {
|
|
525
|
+
logErrors(errorText, ERROR_TITLE);
|
|
526
|
+
chatMessage += ERROR_TITLE + errorText;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
return chatMessage;
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
/**
|
|
533
|
+
* @param webUrl {string}
|
|
534
|
+
* @param sitemapUrls {string[]}
|
|
535
|
+
* @param shouldReportMissing {boolean}
|
|
536
|
+
* @returns {ResultDto}
|
|
537
|
+
*/
|
|
538
|
+
function getResult(webUrl, sitemapUrls, shouldReportMissing) {
|
|
539
|
+
const shouldBeInSitemap = getPagesShouldBeInSitemap(webUrl);
|
|
540
|
+
const errors = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === false);
|
|
541
|
+
const duplicates = [...new Set(sitemapUrls.filter((item, index, self) => self.indexOf(item) !== index))];
|
|
542
|
+
const missingInSitemap = shouldReportMissing ? shouldBeInSitemap.filter((testedUrl) => !sitemapUrls.includes(testedUrl)) : [];
|
|
543
|
+
const ok = TESTED_URLS.filter((r) => r.status === 200);
|
|
544
|
+
const skippedUrls = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === true);
|
|
545
|
+
|
|
546
|
+
const errorsSum = missingInSitemap.length + duplicates.length + errors.length;
|
|
547
|
+
|
|
548
|
+
return {errors, duplicates, missingInSitemap, ok, skippedUrls, errorsSum};
|
|
549
|
+
}
|
|
550
|
+
|
|
495
551
|
/**
|
|
496
552
|
* @param sitemapUrl {string}
|
|
497
553
|
* @param skip {number}
|
|
498
554
|
* @param withNested {boolean}
|
|
499
555
|
* @param withImages {boolean}
|
|
556
|
+
* @param checkMissing {boolean}
|
|
500
557
|
* @param googleWebhookUrl {string|undefined}
|
|
501
558
|
* @return {Promise<*>}
|
|
502
559
|
*/
|
|
503
|
-
module.exports = async function run(sitemapUrl, skip, withNested, withImages, googleWebhookUrl) {
|
|
560
|
+
module.exports = async function run(sitemapUrl, skip, withNested, withImages, checkMissing, googleWebhookUrl) {
|
|
504
561
|
if (!sitemapUrl) {
|
|
505
562
|
stdout.write("⛔ Required parameter --url is empty.\n");
|
|
506
563
|
return process.exit(1);
|
|
@@ -509,56 +566,29 @@ module.exports = async function run(sitemapUrl, skip, withNested, withImages, go
|
|
|
509
566
|
const url = new URL(sitemapUrl);
|
|
510
567
|
const webUrl = url.origin;
|
|
511
568
|
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
stdout.write(`${createTabSpace()}Will test nested links\n`);
|
|
516
|
-
}
|
|
517
|
-
if (withImages) {
|
|
518
|
-
stdout.write(`${createTabSpace()}Will test images\n\n`);
|
|
519
|
-
}
|
|
569
|
+
const shouldReportMissing = checkMissing && withNested;
|
|
570
|
+
|
|
571
|
+
logInitialInfo(sitemapUrl, webUrl, withNested, withImages, checkMissing, shouldReportMissing);
|
|
520
572
|
|
|
521
573
|
const startTime = performance.now();
|
|
522
574
|
const sitemapUrls = await Sitemap.getSitemap(sitemapUrl);
|
|
523
575
|
await testSitemapUrls(sitemapUrls, webUrl, sitemapUrl, skip, withNested, withImages);
|
|
524
576
|
const finishTime = performance.now();
|
|
525
577
|
|
|
526
|
-
const
|
|
527
|
-
|
|
528
|
-
const errors = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === false);
|
|
529
|
-
const duplicates = [...new Set(sitemapUrls.filter((item, index, self) => self.indexOf(item) !== index))];
|
|
530
|
-
const skippedUrls = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === true);
|
|
531
|
-
const ok = TESTED_URLS.filter((r) => r.status === 200);
|
|
532
|
-
const missingInSitemap = shouldBeInSitemap.filter((testedUrl) => !sitemapUrls.includes(testedUrl));
|
|
578
|
+
const result = getResult(webUrl, sitemapUrls, shouldReportMissing);
|
|
533
579
|
|
|
534
|
-
if (
|
|
535
|
-
|
|
536
|
-
const duplicatesText = duplicates.map((url) => `${createTabSpace()}${url}`).join("\n");
|
|
537
|
-
const missingText = missingInSitemap.map((url) => `${createTabSpace()}${url}`).join("\n");
|
|
538
|
-
const errorText = createErrorResult(errors);
|
|
539
|
-
|
|
540
|
-
if (duplicatesText) {
|
|
541
|
-
logErrors(duplicatesText, DUPLICATES_TITLE);
|
|
542
|
-
chatMessage += DUPLICATES_TITLE + duplicatesText;
|
|
543
|
-
}
|
|
544
|
-
if (missingText) {
|
|
545
|
-
logErrors(missingText, MISSING_TITLE);
|
|
546
|
-
chatMessage += MISSING_TITLE + missingText;
|
|
547
|
-
}
|
|
548
|
-
if (errorText) {
|
|
549
|
-
logErrors(errorText, ERROR_TITLE);
|
|
550
|
-
chatMessage += ERROR_TITLE + errorText;
|
|
551
|
-
}
|
|
580
|
+
if (result.errorsSum > 0) {
|
|
581
|
+
const chatMessage = logResultErrors(webUrl, result);
|
|
552
582
|
|
|
553
583
|
await sendGoogleChatMessage(chatMessage, googleWebhookUrl);
|
|
554
584
|
}
|
|
555
585
|
|
|
556
|
-
if (skippedUrls.length > 0) {
|
|
557
|
-
const skippedUrlsText = createSkippedResult(skippedUrls);
|
|
586
|
+
if (result.skippedUrls.length > 0) {
|
|
587
|
+
const skippedUrlsText = createSkippedResult(result.skippedUrls);
|
|
558
588
|
logErrors(skippedUrlsText, "\nSkipped origins:\n");
|
|
559
589
|
}
|
|
560
590
|
|
|
561
|
-
logStatistics(ok, Math.ceil(finishTime - startTime));
|
|
591
|
+
logStatistics(result.ok, Math.ceil(finishTime - startTime));
|
|
562
592
|
|
|
563
|
-
process.exit(
|
|
593
|
+
process.exit(result.errorsSum > 0 ? 1 : 0);
|
|
564
594
|
};
|