@uxf/scripts 11.62.1 → 11.62.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@uxf/scripts",
3
- "version": "11.62.1",
3
+ "version": "11.62.3",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -43,6 +43,11 @@ Environment variables:
43
43
  type: "boolean",
44
44
  group: "Options",
45
45
  })
46
+ .option("check-missing", {
47
+ describe: "If indexable pages missing in sitemap should be reported. (--with-nested must be used)",
48
+ type: "boolean",
49
+ group: "Options",
50
+ })
46
51
  .option("google-chat-webhook-url", {
47
52
  describe: "Webhook URL of Google Chat where to send the results.",
48
53
  type: "string",
@@ -70,6 +75,7 @@ Environment variables:
70
75
  skip,
71
76
  options["with-nested"],
72
77
  options["with-images"],
78
+ options["check-missing"],
73
79
  options["google-chat-webhook-url"],
74
80
  );
75
81
  } catch (e) {
@@ -9,15 +9,13 @@ const { HTTP_USERNAME, HTTP_PASSWORD } = process.env;
9
9
 
10
10
  /**
11
11
  * @typedef {{parentUrl: (string | undefined), isImg: boolean, isWebPage: boolean, ttl: number, url: string, status: number, message: (string | undefined), skipped: boolean, html: (string | null), redirected: boolean}} UrlCheckResponse
12
- */
13
-
14
- /**
15
12
  * @typedef {{url: string, parentUrl: (string | undefined), canonicalUrl: (string | null), isImg: boolean, isWebPage: boolean, ttl: number, status: number, message: (string | undefined), skipped: boolean, indexable: boolean, redirected: boolean}} TestedUrlDto
13
+ * @typedef {{errors: TestedUrlDto[], duplicates: unknown[], missingInSitemap: (*|*[]), ok: TestedUrlDto[], skippedUrls: TestedUrlDto[], errorsSum: number}} ResultDto
16
14
  */
17
15
 
18
16
  const DUPLICATES_TITLE = "\n\n\nDuplicated pages in sitemap:\n";
19
17
  const MISSING_TITLE = "\n\n\nMissing pages in sitemap:\n";
20
- const ERROR_TITLE = "\n\n\nErrors:\n"
18
+ const ERROR_TITLE = "\n\n\nErrors:\n";
21
19
 
22
20
  const MAX_TTL = 3;
23
21
  const IMAGES_LABEL = "🏞 Images:";
@@ -28,10 +26,27 @@ const URLS_LABEL = "🔗 Links:";
28
26
  */
29
27
  const TESTED_URLS = [];
30
28
  const URLS_TO_CHECK = new Set();
31
- const ERRORS = [];
32
29
 
33
30
  const robotsParser = robotsTxtParser({ userAgent: "uxf-bot", allowOnNeutral: false });
34
31
 
32
+ const HOSTNAME_ROBOTS_MAP = {
33
+ "fb.me": "facebook.com",
34
+ };
35
+
36
+ /**
37
+ * @param url {string}
38
+ * @returns {string}
39
+ */
40
+ function getUrlOrigin(url) {
41
+ const urlObject = new URL(url);
42
+
43
+ if (urlObject.hostname in HOSTNAME_ROBOTS_MAP) {
44
+ return new URL(url.replace(urlObject.hostname, HOSTNAME_ROBOTS_MAP[urlObject.hostname])).origin;
45
+ }
46
+
47
+ return urlObject.origin;
48
+ }
49
+
35
50
  /**
36
51
  * @param url {string}
37
52
  * @param options {{redirect: boolean, isExternal: boolean}}
@@ -116,13 +131,6 @@ function createErrorResult(errors) {
116
131
  }
117
132
  }
118
133
 
119
- if (ERRORS.length > 0) {
120
- generalErrors = `\n\nGeneral errors:\n`;
121
- for (const error of ERRORS) {
122
- generalErrors += `${createTabSpace(1)}${error}\n`;
123
- }
124
- }
125
-
126
134
  return parentPages + nestedPages + generalErrors;
127
135
  }
128
136
 
@@ -194,7 +202,7 @@ async function fetchUrl(url, webUrl, parentUrl = undefined, ttl = 1) {
194
202
  }
195
203
 
196
204
  try {
197
- const origin = new URL(url).origin;
205
+ const origin = getUrlOrigin(url);
198
206
 
199
207
  if (parentUrl && origin !== webUrl) {
200
208
  await robotsParser.useRobotsFor(origin);
@@ -344,17 +352,13 @@ async function testSitemapUrls(urls, webUrl, sitemapUrl, skip, withNested, withI
344
352
  * @return {Promise<void>}
345
353
  */
346
354
  async function testNestedUrls(html, parentUrl, parentIndex, webUrl) {
347
- try {
348
- const $ = cheerio.load(html.toString());
349
- let urls = createCorrectLinks(
350
- $("a[href]").map((i, node) => $(node).attr("href")),
351
- webUrl,
352
- );
355
+ const $ = cheerio.load(html.toString());
356
+ let urls = createCorrectLinks(
357
+ $("a[href]").map((i, node) => $(node).attr("href")),
358
+ webUrl,
359
+ );
353
360
 
354
- await testNested(urls, parentIndex, parentUrl, createTabSpace() + URLS_LABEL, webUrl);
355
- } catch (e) {
356
- ERRORS.push(`Can't test all nested pages for ${parentUrl} - ${e.message}`);
357
- }
361
+ await testNested(urls, parentIndex, parentUrl, createTabSpace() + URLS_LABEL, webUrl);
358
362
  }
359
363
 
360
364
  /**
@@ -365,17 +369,13 @@ async function testNestedUrls(html, parentUrl, parentIndex, webUrl) {
365
369
  * @return {Promise<void>}
366
370
  */
367
371
  async function testNestedImages(html, parentUrl, parentIndex, webUrl) {
368
- try {
369
- const $ = cheerio.load(html);
370
- const images = createCorrectLinks(
371
- $("img[src]").map((i, node) => $(node).attr("src")),
372
- webUrl,
373
- );
372
+ const $ = cheerio.load(html);
373
+ const images = createCorrectLinks(
374
+ $("img[src]").map((i, node) => $(node).attr("src")),
375
+ webUrl,
376
+ );
374
377
 
375
- await testNested(images, parentIndex, parentUrl, createTabSpace() + IMAGES_LABEL, webUrl);
376
- } catch (e) {
377
- ERRORS.push(`Can't test all nested images for ${parentUrl} - ${e.message}`);
378
- }
378
+ await testNested(images, parentIndex, parentUrl, createTabSpace() + IMAGES_LABEL, webUrl);
379
379
  }
380
380
 
381
381
  /**
@@ -436,7 +436,7 @@ function convertTime(millis) {
436
436
  }
437
437
 
438
438
  /**
439
- * @param okResults {UrlCheckResponse[]}
439
+ * @param okResults {TestedUrlDto[]}
440
440
  * @param time {number}
441
441
  */
442
442
  function logStatistics(okResults, time) {
@@ -484,6 +484,7 @@ function getPagesShouldBeInSitemap(webUrl) {
484
484
  (dto) =>
485
485
  (dto.canonicalUrl ?? dto.url).startsWith(webUrl) &&
486
486
  !(dto.canonicalUrl ?? dto.url).includes("?") &&
487
+ !(dto.canonicalUrl ?? dto.url).includes("#") &&
487
488
  dto.status === 200 &&
488
489
  dto.ttl <= 1 &&
489
490
  !dto.redirected &&
@@ -492,15 +493,91 @@ function getPagesShouldBeInSitemap(webUrl) {
492
493
  .map((url) => (url.canonicalUrl ?? url.url).toLowerCase());
493
494
  }
494
495
 
496
+ /**
497
+ * @param sitemapUrl {string}
498
+ * @param webUrl {string}
499
+ * @param withNested {boolean}
500
+ * @param withImages {boolean}
501
+ * @param checkMissing {boolean}
502
+ * @param shouldReportMissing {boolean}
503
+ * @return {void}
504
+ */
505
+ function logInitialInfo(sitemapUrl, webUrl, withNested, withImages, checkMissing, shouldReportMissing) {
506
+ stdout.write(`${createTabSpace()}Sitemap url: ${sitemapUrl}\n`);
507
+ stdout.write(`${createTabSpace()}Web url: ${webUrl}\n\n`);
508
+ if (withNested) {
509
+ stdout.write(`${createTabSpace()}Will test nested links\n`);
510
+ }
511
+ if (withImages) {
512
+ stdout.write(`${createTabSpace()}Will test images\n\n`);
513
+ }
514
+ if (checkMissing) {
515
+ if (!shouldReportMissing) {
516
+ stdout.write(`${createTabSpace()}--check-missing option is only available with --with-nested option!\n`);
517
+ process.exit(1);
518
+ }
519
+ stdout.write(`${createTabSpace()}Will look for pages missing in sitemap\n\n`);
520
+ }
521
+ }
522
+
523
+ /**
524
+ * @param webUrl {string}
525
+ * @param result {ResultDto}
526
+ * @returns {string}
527
+ */
528
+ function logResultErrors(webUrl, result) {
529
+ let chatMessage = `Result for ${webUrl}:\n\n`;
530
+ const duplicatesText = result.duplicates.map((url) => `${createTabSpace()}${url}`).join("\n");
531
+ const missingText = result.missingInSitemap.map((url) => `${createTabSpace()}${url}`).join("\n");
532
+ const errorText = createErrorResult(result.errors);
533
+
534
+ if (duplicatesText) {
535
+ logErrors(duplicatesText, DUPLICATES_TITLE);
536
+ chatMessage += DUPLICATES_TITLE + duplicatesText;
537
+ }
538
+ if (missingText) {
539
+ logErrors(missingText, MISSING_TITLE);
540
+ chatMessage += MISSING_TITLE + missingText;
541
+ }
542
+ if (errorText) {
543
+ logErrors(errorText, ERROR_TITLE);
544
+ chatMessage += ERROR_TITLE + errorText;
545
+ }
546
+
547
+ return chatMessage;
548
+ }
549
+
550
+ /**
551
+ * @param webUrl {string}
552
+ * @param sitemapUrls {string[]}
553
+ * @param shouldReportMissing {boolean}
554
+ * @returns {ResultDto}
555
+ */
556
+ function getResult(webUrl, sitemapUrls, shouldReportMissing) {
557
+ const shouldBeInSitemap = getPagesShouldBeInSitemap(webUrl);
558
+ const errors = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === false);
559
+ const duplicates = [...new Set(sitemapUrls.filter((item, index, self) => self.indexOf(item) !== index))];
560
+ const missingInSitemap = shouldReportMissing
561
+ ? shouldBeInSitemap.filter((testedUrl) => !sitemapUrls.includes(testedUrl))
562
+ : [];
563
+ const ok = TESTED_URLS.filter((r) => r.status === 200);
564
+ const skippedUrls = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === true);
565
+
566
+ const errorsSum = missingInSitemap.length + duplicates.length + errors.length;
567
+
568
+ return { errors, duplicates, missingInSitemap, ok, skippedUrls, errorsSum };
569
+ }
570
+
495
571
  /**
496
572
  * @param sitemapUrl {string}
497
573
  * @param skip {number}
498
574
  * @param withNested {boolean}
499
575
  * @param withImages {boolean}
576
+ * @param checkMissing {boolean}
500
577
  * @param googleWebhookUrl {string|undefined}
501
578
  * @return {Promise<*>}
502
579
  */
503
- module.exports = async function run(sitemapUrl, skip, withNested, withImages, googleWebhookUrl) {
580
+ module.exports = async function run(sitemapUrl, skip, withNested, withImages, checkMissing, googleWebhookUrl) {
504
581
  if (!sitemapUrl) {
505
582
  stdout.write("⛔ Required parameter --url is empty.\n");
506
583
  return process.exit(1);
@@ -509,56 +586,29 @@ module.exports = async function run(sitemapUrl, skip, withNested, withImages, go
509
586
  const url = new URL(sitemapUrl);
510
587
  const webUrl = url.origin;
511
588
 
512
- stdout.write(`${createTabSpace()}Sitemap url: ${sitemapUrl}\n`);
513
- stdout.write(`${createTabSpace()}Web url: ${webUrl}\n\n`);
514
- if (withNested) {
515
- stdout.write(`${createTabSpace()}Will test nested links\n`);
516
- }
517
- if (withImages) {
518
- stdout.write(`${createTabSpace()}Will test images\n\n`);
519
- }
589
+ const shouldReportMissing = checkMissing && withNested;
590
+
591
+ logInitialInfo(sitemapUrl, webUrl, withNested, withImages, checkMissing, shouldReportMissing);
520
592
 
521
593
  const startTime = performance.now();
522
594
  const sitemapUrls = await Sitemap.getSitemap(sitemapUrl);
523
595
  await testSitemapUrls(sitemapUrls, webUrl, sitemapUrl, skip, withNested, withImages);
524
596
  const finishTime = performance.now();
525
597
 
526
- const shouldBeInSitemap = getPagesShouldBeInSitemap(webUrl);
527
-
528
- const errors = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === false);
529
- const duplicates = [...new Set(sitemapUrls.filter((item, index, self) => self.indexOf(item) !== index))];
530
- const skippedUrls = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === true);
531
- const ok = TESTED_URLS.filter((r) => r.status === 200);
532
- const missingInSitemap = shouldBeInSitemap.filter((testedUrl) => !sitemapUrls.includes(testedUrl));
533
-
534
- if (missingInSitemap.length > 0 || duplicates.length > 0 || errors.length > 0 || ERRORS.length > 0) {
535
- let chatMessage = "";
536
- const duplicatesText = duplicates.map((url) => `${createTabSpace()}${url}`).join("\n");
537
- const missingText = missingInSitemap.map((url) => `${createTabSpace()}${url}`).join("\n");
538
- const errorText = createErrorResult(errors);
598
+ const result = getResult(webUrl, sitemapUrls, shouldReportMissing);
539
599
 
540
- if (duplicatesText) {
541
- logErrors(duplicatesText, DUPLICATES_TITLE);
542
- chatMessage += DUPLICATES_TITLE + duplicatesText;
543
- }
544
- if (missingText) {
545
- logErrors(missingText, MISSING_TITLE);
546
- chatMessage += MISSING_TITLE + missingText;
547
- }
548
- if (errorText) {
549
- logErrors(errorText, ERROR_TITLE);
550
- chatMessage += ERROR_TITLE + errorText;
551
- }
600
+ if (result.errorsSum > 0) {
601
+ const chatMessage = logResultErrors(webUrl, result);
552
602
 
553
603
  await sendGoogleChatMessage(chatMessage, googleWebhookUrl);
554
604
  }
555
605
 
556
- if (skippedUrls.length > 0) {
557
- const skippedUrlsText = createSkippedResult(skippedUrls);
606
+ if (result.skippedUrls.length > 0) {
607
+ const skippedUrlsText = createSkippedResult(result.skippedUrls);
558
608
  logErrors(skippedUrlsText, "\nSkipped origins:\n");
559
609
  }
560
610
 
561
- logStatistics(ok, Math.ceil(finishTime - startTime));
611
+ logStatistics(result.ok, Math.ceil(finishTime - startTime));
562
612
 
563
- process.exit(errors.length > 0 ? 1 : 0);
613
+ process.exit(result.errorsSum > 0 ? 1 : 0);
564
614
  };