@uxf/scripts 11.62.0 → 11.62.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@uxf/scripts",
3
- "version": "11.62.0",
3
+ "version": "11.62.2",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -43,6 +43,11 @@ Environment variables:
43
43
  type: "boolean",
44
44
  group: "Options",
45
45
  })
46
+ .option("check-missing", {
47
+ describe: "If indexable pages missing in sitemap should be reported. (--with-nested must be used)",
48
+ type: "boolean",
49
+ group: "Options",
50
+ })
46
51
  .option("google-chat-webhook-url", {
47
52
  describe: "Webhook URL of Google Chat where to send the results.",
48
53
  type: "string",
@@ -70,6 +75,7 @@ Environment variables:
70
75
  skip,
71
76
  options["with-nested"],
72
77
  options["with-images"],
78
+ options["check-missing"],
73
79
  options["google-chat-webhook-url"],
74
80
  );
75
81
  } catch (e) {
@@ -9,12 +9,14 @@ const { HTTP_USERNAME, HTTP_PASSWORD } = process.env;
9
9
 
10
10
  /**
11
11
  * @typedef {{parentUrl: (string | undefined), isImg: boolean, isWebPage: boolean, ttl: number, url: string, status: number, message: (string | undefined), skipped: boolean, html: (string | null), redirected: boolean}} UrlCheckResponse
12
- */
13
-
14
- /**
15
12
  * @typedef {{url: string, parentUrl: (string | undefined), canonicalUrl: (string | null), isImg: boolean, isWebPage: boolean, ttl: number, status: number, message: (string | undefined), skipped: boolean, indexable: boolean, redirected: boolean}} TestedUrlDto
13
+ * @typedef {{errors: TestedUrlDto[], duplicates: unknown[], missingInSitemap: (*|*[]), ok: TestedUrlDto[], skippedUrls: TestedUrlDto[], errorsSum: number}} ResultDto
16
14
  */
17
15
 
16
+ const DUPLICATES_TITLE = "\n\n\nDuplicated pages in sitemap:\n";
17
+ const MISSING_TITLE = "\n\n\nMissing pages in sitemap:\n";
18
+ const ERROR_TITLE = "\n\n\nErrors:\n"
19
+
18
20
  const MAX_TTL = 3;
19
21
  const IMAGES_LABEL = "🏞 Images:";
20
22
  const URLS_LABEL = "🔗 Links:";
@@ -24,7 +26,6 @@ const URLS_LABEL = "🔗 Links:";
24
26
  */
25
27
  const TESTED_URLS = [];
26
28
  const URLS_TO_CHECK = new Set();
27
- const ERRORS = [];
28
29
 
29
30
  const robotsParser = robotsTxtParser({ userAgent: "uxf-bot", allowOnNeutral: false });
30
31
 
@@ -75,7 +76,7 @@ function createErrorList(errors) {
75
76
  }
76
77
 
77
78
  /**
78
- * @param errors {UrlCheckResponse[]}
79
+ * @param errors {TestedUrlDto[]}
79
80
  * @return {string}
80
81
  */
81
82
  function createErrorResult(errors) {
@@ -112,18 +113,11 @@ function createErrorResult(errors) {
112
113
  }
113
114
  }
114
115
 
115
- if (ERRORS.length > 0) {
116
- generalErrors = `\n\nGeneral errors:\n`;
117
- for (const error of ERRORS) {
118
- generalErrors += `${createTabSpace(1)}${error}\n`;
119
- }
120
- }
121
-
122
116
  return parentPages + nestedPages + generalErrors;
123
117
  }
124
118
 
125
119
  /**
126
- * @param skippedUrls {UrlCheckResponse[]}
120
+ * @param skippedUrls {TestedUrlDto[]}
127
121
  * @return {string}
128
122
  */
129
123
  function createSkippedResult(skippedUrls) {
@@ -340,20 +334,13 @@ async function testSitemapUrls(urls, webUrl, sitemapUrl, skip, withNested, withI
340
334
  * @return {Promise<void>}
341
335
  */
342
336
  async function testNestedUrls(html, parentUrl, parentIndex, webUrl) {
343
- try {
344
- const $ = cheerio.load(html.toString());
345
- let urls = createCorrectLinks(
346
- $("a[href]").map((i, node) => $(node).attr("href")),
347
- webUrl,
348
- );
349
-
350
- // FIXME
351
- urls = urls.filter((url) => url.startsWith(webUrl) || url.startsWith("/"));
337
+ const $ = cheerio.load(html.toString());
338
+ let urls = createCorrectLinks(
339
+ $("a[href]").map((i, node) => $(node).attr("href")),
340
+ webUrl,
341
+ );
352
342
 
353
- await testNested(urls, parentIndex, parentUrl, createTabSpace() + URLS_LABEL, webUrl);
354
- } catch (e) {
355
- ERRORS.push(`Can't test all nested pages for ${parentUrl} - ${e.message}`);
356
- }
343
+ await testNested(urls, parentIndex, parentUrl, createTabSpace() + URLS_LABEL, webUrl);
357
344
  }
358
345
 
359
346
  /**
@@ -364,17 +351,13 @@ async function testNestedUrls(html, parentUrl, parentIndex, webUrl) {
364
351
  * @return {Promise<void>}
365
352
  */
366
353
  async function testNestedImages(html, parentUrl, parentIndex, webUrl) {
367
- try {
368
- const $ = cheerio.load(html);
369
- const images = createCorrectLinks(
370
- $("img[src]").map((i, node) => $(node).attr("src")),
371
- webUrl,
372
- );
354
+ const $ = cheerio.load(html);
355
+ const images = createCorrectLinks(
356
+ $("img[src]").map((i, node) => $(node).attr("src")),
357
+ webUrl,
358
+ );
373
359
 
374
- await testNested(images, parentIndex, parentUrl, createTabSpace() + IMAGES_LABEL, webUrl);
375
- } catch (e) {
376
- ERRORS.push(`Can't test all nested images for ${parentUrl} - ${e.message}`);
377
- }
360
+ await testNested(images, parentIndex, parentUrl, createTabSpace() + IMAGES_LABEL, webUrl);
378
361
  }
379
362
 
380
363
  /**
@@ -435,7 +418,7 @@ function convertTime(millis) {
435
418
  }
436
419
 
437
420
  /**
438
- * @param okResults {UrlCheckResponse[]}
421
+ * @param okResults {TestedUrlDto[]}
439
422
  * @param time {number}
440
423
  */
441
424
  function logStatistics(okResults, time) {
@@ -483,6 +466,7 @@ function getPagesShouldBeInSitemap(webUrl) {
483
466
  (dto) =>
484
467
  (dto.canonicalUrl ?? dto.url).startsWith(webUrl) &&
485
468
  !(dto.canonicalUrl ?? dto.url).includes("?") &&
469
+ !(dto.canonicalUrl ?? dto.url).includes("#") &&
486
470
  dto.status === 200 &&
487
471
  dto.ttl <= 1 &&
488
472
  !dto.redirected &&
@@ -491,15 +475,89 @@ function getPagesShouldBeInSitemap(webUrl) {
491
475
  .map((url) => (url.canonicalUrl ?? url.url).toLowerCase());
492
476
  }
493
477
 
478
+ /**
479
+ * @param sitemapUrl {string}
480
+ * @param webUrl {string}
481
+ * @param withNested {boolean}
482
+ * @param withImages {boolean}
483
+ * @param checkMissing {boolean}
484
+ * @param shouldReportMissing {boolean}
485
+ * @return {void}
486
+ */
487
+ function logInitialInfo(sitemapUrl, webUrl, withNested, withImages, checkMissing, shouldReportMissing) {
488
+ stdout.write(`${createTabSpace()}Sitemap url: ${sitemapUrl}\n`);
489
+ stdout.write(`${createTabSpace()}Web url: ${webUrl}\n\n`);
490
+ if (withNested) {
491
+ stdout.write(`${createTabSpace()}Will test nested links\n`);
492
+ }
493
+ if (withImages) {
494
+ stdout.write(`${createTabSpace()}Will test images\n\n`);
495
+ }
496
+ if (checkMissing) {
497
+ if (!shouldReportMissing) {
498
+ stdout.write(`${createTabSpace()}--check-missing option is only available with --with-nested option!\n`);
499
+ process.exit(1);
500
+ }
501
+ stdout.write(`${createTabSpace()}Will look for pages missing in sitemap\n\n`);
502
+ }
503
+ }
504
+
505
+ /**
506
+ * @param webUrl {string}
507
+ * @param result {ResultDto}
508
+ * @returns {string}
509
+ */
510
+ function logResultErrors(webUrl, result) {
511
+ let chatMessage = `Result for ${webUrl}:\n\n`;
512
+ const duplicatesText = result.duplicates.map((url) => `${createTabSpace()}${url}`).join("\n");
513
+ const missingText = result.missingInSitemap.map((url) => `${createTabSpace()}${url}`).join("\n");
514
+ const errorText = createErrorResult(result.errors);
515
+
516
+ if (duplicatesText) {
517
+ logErrors(duplicatesText, DUPLICATES_TITLE);
518
+ chatMessage += DUPLICATES_TITLE + duplicatesText;
519
+ }
520
+ if (missingText) {
521
+ logErrors(missingText, MISSING_TITLE);
522
+ chatMessage += MISSING_TITLE + missingText;
523
+ }
524
+ if (errorText) {
525
+ logErrors(errorText, ERROR_TITLE);
526
+ chatMessage += ERROR_TITLE + errorText;
527
+ }
528
+
529
+ return chatMessage;
530
+ }
531
+
532
+ /**
533
+ * @param webUrl {string}
534
+ * @param sitemapUrls {string[]}
535
+ * @param shouldReportMissing {boolean}
536
+ * @returns {ResultDto}
537
+ */
538
+ function getResult(webUrl, sitemapUrls, shouldReportMissing) {
539
+ const shouldBeInSitemap = getPagesShouldBeInSitemap(webUrl);
540
+ const errors = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === false);
541
+ const duplicates = [...new Set(sitemapUrls.filter((item, index, self) => self.indexOf(item) !== index))];
542
+ const missingInSitemap = shouldReportMissing ? shouldBeInSitemap.filter((testedUrl) => !sitemapUrls.includes(testedUrl)) : [];
543
+ const ok = TESTED_URLS.filter((r) => r.status === 200);
544
+ const skippedUrls = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === true);
545
+
546
+ const errorsSum = missingInSitemap.length + duplicates.length + errors.length;
547
+
548
+ return {errors, duplicates, missingInSitemap, ok, skippedUrls, errorsSum};
549
+ }
550
+
494
551
  /**
495
552
  * @param sitemapUrl {string}
496
553
  * @param skip {number}
497
554
  * @param withNested {boolean}
498
555
  * @param withImages {boolean}
556
+ * @param checkMissing {boolean}
499
557
  * @param googleWebhookUrl {string|undefined}
500
558
  * @return {Promise<*>}
501
559
  */
502
- module.exports = async function run(sitemapUrl, skip, withNested, withImages, googleWebhookUrl) {
560
+ module.exports = async function run(sitemapUrl, skip, withNested, withImages, checkMissing, googleWebhookUrl) {
503
561
  if (!sitemapUrl) {
504
562
  stdout.write("⛔ Required parameter --url is empty.\n");
505
563
  return process.exit(1);
@@ -508,50 +566,29 @@ module.exports = async function run(sitemapUrl, skip, withNested, withImages, go
508
566
  const url = new URL(sitemapUrl);
509
567
  const webUrl = url.origin;
510
568
 
511
- stdout.write(`${createTabSpace()}Sitemap url: ${sitemapUrl}\n`);
512
- stdout.write(`${createTabSpace()}Web url: ${webUrl}\n\n`);
513
- if (withNested) {
514
- stdout.write(`${createTabSpace()}Will test nested links\n`);
515
- }
516
- if (withImages) {
517
- stdout.write(`${createTabSpace()}Will test images\n\n`);
518
- }
569
+ const shouldReportMissing = checkMissing && withNested;
570
+
571
+ logInitialInfo(sitemapUrl, webUrl, withNested, withImages, checkMissing, shouldReportMissing);
519
572
 
520
573
  const startTime = performance.now();
521
574
  const sitemapUrls = await Sitemap.getSitemap(sitemapUrl);
522
575
  await testSitemapUrls(sitemapUrls, webUrl, sitemapUrl, skip, withNested, withImages);
523
576
  const finishTime = performance.now();
524
577
 
525
- const shouldBeInSitemap = getPagesShouldBeInSitemap(webUrl);
578
+ const result = getResult(webUrl, sitemapUrls, shouldReportMissing);
526
579
 
527
- const errors = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === false);
528
- const duplicates = [...new Set(sitemapUrls.filter((item, index, self) => self.indexOf(item) !== index))];
529
- const skippedUrls = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === true);
530
- const ok = TESTED_URLS.filter((r) => r.status === 200);
531
- const missingInSitemap = shouldBeInSitemap.filter((testedUrl) => !sitemapUrls.includes(testedUrl));
532
-
533
- if (missingInSitemap.length > 0 || duplicates.length > 0 || errors.length > 0 || ERRORS.length > 0) {
534
- const duplicatesText = duplicates.map((url) => `${createTabSpace()}${url}`).join("\n");
535
- const missingText = missingInSitemap.map((url) => `${createTabSpace()}${url}`).join("\n");
536
- const errorText = createErrorResult(errors);
537
- if (duplicatesText) {
538
- logErrors(duplicatesText, "\n\n\nDuplicated pages in sitemap:\n");
539
- }
540
- if (missingText) {
541
- logErrors(missingText, "\n\n\nMissing pages in sitemap:\n");
542
- }
543
- if (errorText) {
544
- logErrors(errorText, "\n\n\nErrors:\n");
545
- }
546
- await sendGoogleChatMessage(duplicatesText + missingText + errorText, googleWebhookUrl);
580
+ if (result.errorsSum > 0) {
581
+ const chatMessage = logResultErrors(webUrl, result);
582
+
583
+ await sendGoogleChatMessage(chatMessage, googleWebhookUrl);
547
584
  }
548
585
 
549
- if (skippedUrls.length > 0) {
550
- const skippedUrlsText = createSkippedResult(skippedUrls);
586
+ if (result.skippedUrls.length > 0) {
587
+ const skippedUrlsText = createSkippedResult(result.skippedUrls);
551
588
  logErrors(skippedUrlsText, "\nSkipped origins:\n");
552
589
  }
553
590
 
554
- logStatistics(ok, Math.ceil(finishTime - startTime));
591
+ logStatistics(result.ok, Math.ceil(finishTime - startTime));
555
592
 
556
- process.exit(errors.length > 0 ? 1 : 0);
593
+ process.exit(result.errorsSum > 0 ? 1 : 0);
557
594
  };