@uxf/scripts 11.62.1 → 11.62.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@uxf/scripts",
3
- "version": "11.62.1",
3
+ "version": "11.62.2",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -43,6 +43,11 @@ Environment variables:
43
43
  type: "boolean",
44
44
  group: "Options",
45
45
  })
46
+ .option("check-missing", {
47
+ describe: "If indexable pages missing in sitemap should be reported. (--with-nested must be used)",
48
+ type: "boolean",
49
+ group: "Options",
50
+ })
46
51
  .option("google-chat-webhook-url", {
47
52
  describe: "Webhook URL of Google Chat where to send the results.",
48
53
  type: "string",
@@ -70,6 +75,7 @@ Environment variables:
70
75
  skip,
71
76
  options["with-nested"],
72
77
  options["with-images"],
78
+ options["check-missing"],
73
79
  options["google-chat-webhook-url"],
74
80
  );
75
81
  } catch (e) {
@@ -9,10 +9,8 @@ const { HTTP_USERNAME, HTTP_PASSWORD } = process.env;
9
9
 
10
10
  /**
11
11
  * @typedef {{parentUrl: (string | undefined), isImg: boolean, isWebPage: boolean, ttl: number, url: string, status: number, message: (string | undefined), skipped: boolean, html: (string | null), redirected: boolean}} UrlCheckResponse
12
- */
13
-
14
- /**
15
12
  * @typedef {{url: string, parentUrl: (string | undefined), canonicalUrl: (string | null), isImg: boolean, isWebPage: boolean, ttl: number, status: number, message: (string | undefined), skipped: boolean, indexable: boolean, redirected: boolean}} TestedUrlDto
13
+ * @typedef {{errors: TestedUrlDto[], duplicates: unknown[], missingInSitemap: (*|*[]), ok: TestedUrlDto[], skippedUrls: TestedUrlDto[], errorsSum: number}} ResultDto
16
14
  */
17
15
 
18
16
  const DUPLICATES_TITLE = "\n\n\nDuplicated pages in sitemap:\n";
@@ -28,7 +26,6 @@ const URLS_LABEL = "🔗 Links:";
28
26
  */
29
27
  const TESTED_URLS = [];
30
28
  const URLS_TO_CHECK = new Set();
31
- const ERRORS = [];
32
29
 
33
30
  const robotsParser = robotsTxtParser({ userAgent: "uxf-bot", allowOnNeutral: false });
34
31
 
@@ -116,13 +113,6 @@ function createErrorResult(errors) {
116
113
  }
117
114
  }
118
115
 
119
- if (ERRORS.length > 0) {
120
- generalErrors = `\n\nGeneral errors:\n`;
121
- for (const error of ERRORS) {
122
- generalErrors += `${createTabSpace(1)}${error}\n`;
123
- }
124
- }
125
-
126
116
  return parentPages + nestedPages + generalErrors;
127
117
  }
128
118
 
@@ -344,17 +334,13 @@ async function testSitemapUrls(urls, webUrl, sitemapUrl, skip, withNested, withI
344
334
  * @return {Promise<void>}
345
335
  */
346
336
  async function testNestedUrls(html, parentUrl, parentIndex, webUrl) {
347
- try {
348
- const $ = cheerio.load(html.toString());
349
- let urls = createCorrectLinks(
350
- $("a[href]").map((i, node) => $(node).attr("href")),
351
- webUrl,
352
- );
337
+ const $ = cheerio.load(html.toString());
338
+ let urls = createCorrectLinks(
339
+ $("a[href]").map((i, node) => $(node).attr("href")),
340
+ webUrl,
341
+ );
353
342
 
354
- await testNested(urls, parentIndex, parentUrl, createTabSpace() + URLS_LABEL, webUrl);
355
- } catch (e) {
356
- ERRORS.push(`Can't test all nested pages for ${parentUrl} - ${e.message}`);
357
- }
343
+ await testNested(urls, parentIndex, parentUrl, createTabSpace() + URLS_LABEL, webUrl);
358
344
  }
359
345
 
360
346
  /**
@@ -365,17 +351,13 @@ async function testNestedUrls(html, parentUrl, parentIndex, webUrl) {
365
351
  * @return {Promise<void>}
366
352
  */
367
353
  async function testNestedImages(html, parentUrl, parentIndex, webUrl) {
368
- try {
369
- const $ = cheerio.load(html);
370
- const images = createCorrectLinks(
371
- $("img[src]").map((i, node) => $(node).attr("src")),
372
- webUrl,
373
- );
354
+ const $ = cheerio.load(html);
355
+ const images = createCorrectLinks(
356
+ $("img[src]").map((i, node) => $(node).attr("src")),
357
+ webUrl,
358
+ );
374
359
 
375
- await testNested(images, parentIndex, parentUrl, createTabSpace() + IMAGES_LABEL, webUrl);
376
- } catch (e) {
377
- ERRORS.push(`Can't test all nested images for ${parentUrl} - ${e.message}`);
378
- }
360
+ await testNested(images, parentIndex, parentUrl, createTabSpace() + IMAGES_LABEL, webUrl);
379
361
  }
380
362
 
381
363
  /**
@@ -436,7 +418,7 @@ function convertTime(millis) {
436
418
  }
437
419
 
438
420
  /**
439
- * @param okResults {UrlCheckResponse[]}
421
+ * @param okResults {TestedUrlDto[]}
440
422
  * @param time {number}
441
423
  */
442
424
  function logStatistics(okResults, time) {
@@ -484,6 +466,7 @@ function getPagesShouldBeInSitemap(webUrl) {
484
466
  (dto) =>
485
467
  (dto.canonicalUrl ?? dto.url).startsWith(webUrl) &&
486
468
  !(dto.canonicalUrl ?? dto.url).includes("?") &&
469
+ !(dto.canonicalUrl ?? dto.url).includes("#") &&
487
470
  dto.status === 200 &&
488
471
  dto.ttl <= 1 &&
489
472
  !dto.redirected &&
@@ -492,15 +475,89 @@ function getPagesShouldBeInSitemap(webUrl) {
492
475
  .map((url) => (url.canonicalUrl ?? url.url).toLowerCase());
493
476
  }
494
477
 
478
+ /**
479
+ * @param sitemapUrl {string}
480
+ * @param webUrl {string}
481
+ * @param withNested {boolean}
482
+ * @param withImages {boolean}
483
+ * @param checkMissing {boolean}
484
+ * @param shouldReportMissing {boolean}
485
+ * @return {void}
486
+ */
487
+ function logInitialInfo(sitemapUrl, webUrl, withNested, withImages, checkMissing, shouldReportMissing) {
488
+ stdout.write(`${createTabSpace()}Sitemap url: ${sitemapUrl}\n`);
489
+ stdout.write(`${createTabSpace()}Web url: ${webUrl}\n\n`);
490
+ if (withNested) {
491
+ stdout.write(`${createTabSpace()}Will test nested links\n`);
492
+ }
493
+ if (withImages) {
494
+ stdout.write(`${createTabSpace()}Will test images\n\n`);
495
+ }
496
+ if (checkMissing) {
497
+ if (!shouldReportMissing) {
498
+ stdout.write(`${createTabSpace()}--check-missing option is only available with --with-nested option!\n`);
499
+ process.exit(1);
500
+ }
501
+ stdout.write(`${createTabSpace()}Will look for pages missing in sitemap\n\n`);
502
+ }
503
+ }
504
+
505
+ /**
506
+ * @param webUrl {string}
507
+ * @param result {ResultDto}
508
+ * @returns {string}
509
+ */
510
+ function logResultErrors(webUrl, result) {
511
+ let chatMessage = `Result for ${webUrl}:\n\n`;
512
+ const duplicatesText = result.duplicates.map((url) => `${createTabSpace()}${url}`).join("\n");
513
+ const missingText = result.missingInSitemap.map((url) => `${createTabSpace()}${url}`).join("\n");
514
+ const errorText = createErrorResult(result.errors);
515
+
516
+ if (duplicatesText) {
517
+ logErrors(duplicatesText, DUPLICATES_TITLE);
518
+ chatMessage += DUPLICATES_TITLE + duplicatesText;
519
+ }
520
+ if (missingText) {
521
+ logErrors(missingText, MISSING_TITLE);
522
+ chatMessage += MISSING_TITLE + missingText;
523
+ }
524
+ if (errorText) {
525
+ logErrors(errorText, ERROR_TITLE);
526
+ chatMessage += ERROR_TITLE + errorText;
527
+ }
528
+
529
+ return chatMessage;
530
+ }
531
+
532
+ /**
533
+ * @param webUrl {string}
534
+ * @param sitemapUrls {string[]}
535
+ * @param shouldReportMissing {boolean}
536
+ * @returns {ResultDto}
537
+ */
538
+ function getResult(webUrl, sitemapUrls, shouldReportMissing) {
539
+ const shouldBeInSitemap = getPagesShouldBeInSitemap(webUrl);
540
+ const errors = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === false);
541
+ const duplicates = [...new Set(sitemapUrls.filter((item, index, self) => self.indexOf(item) !== index))];
542
+ const missingInSitemap = shouldReportMissing ? shouldBeInSitemap.filter((testedUrl) => !sitemapUrls.includes(testedUrl)) : [];
543
+ const ok = TESTED_URLS.filter((r) => r.status === 200);
544
+ const skippedUrls = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === true);
545
+
546
+ const errorsSum = missingInSitemap.length + duplicates.length + errors.length;
547
+
548
+ return {errors, duplicates, missingInSitemap, ok, skippedUrls, errorsSum};
549
+ }
550
+
495
551
  /**
496
552
  * @param sitemapUrl {string}
497
553
  * @param skip {number}
498
554
  * @param withNested {boolean}
499
555
  * @param withImages {boolean}
556
+ * @param checkMissing {boolean}
500
557
  * @param googleWebhookUrl {string|undefined}
501
558
  * @return {Promise<*>}
502
559
  */
503
- module.exports = async function run(sitemapUrl, skip, withNested, withImages, googleWebhookUrl) {
560
+ module.exports = async function run(sitemapUrl, skip, withNested, withImages, checkMissing, googleWebhookUrl) {
504
561
  if (!sitemapUrl) {
505
562
  stdout.write("⛔ Required parameter --url is empty.\n");
506
563
  return process.exit(1);
@@ -509,56 +566,29 @@ module.exports = async function run(sitemapUrl, skip, withNested, withImages, go
509
566
  const url = new URL(sitemapUrl);
510
567
  const webUrl = url.origin;
511
568
 
512
- stdout.write(`${createTabSpace()}Sitemap url: ${sitemapUrl}\n`);
513
- stdout.write(`${createTabSpace()}Web url: ${webUrl}\n\n`);
514
- if (withNested) {
515
- stdout.write(`${createTabSpace()}Will test nested links\n`);
516
- }
517
- if (withImages) {
518
- stdout.write(`${createTabSpace()}Will test images\n\n`);
519
- }
569
+ const shouldReportMissing = checkMissing && withNested;
570
+
571
+ logInitialInfo(sitemapUrl, webUrl, withNested, withImages, checkMissing, shouldReportMissing);
520
572
 
521
573
  const startTime = performance.now();
522
574
  const sitemapUrls = await Sitemap.getSitemap(sitemapUrl);
523
575
  await testSitemapUrls(sitemapUrls, webUrl, sitemapUrl, skip, withNested, withImages);
524
576
  const finishTime = performance.now();
525
577
 
526
- const shouldBeInSitemap = getPagesShouldBeInSitemap(webUrl);
527
-
528
- const errors = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === false);
529
- const duplicates = [...new Set(sitemapUrls.filter((item, index, self) => self.indexOf(item) !== index))];
530
- const skippedUrls = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === true);
531
- const ok = TESTED_URLS.filter((r) => r.status === 200);
532
- const missingInSitemap = shouldBeInSitemap.filter((testedUrl) => !sitemapUrls.includes(testedUrl));
578
+ const result = getResult(webUrl, sitemapUrls, shouldReportMissing);
533
579
 
534
- if (missingInSitemap.length > 0 || duplicates.length > 0 || errors.length > 0 || ERRORS.length > 0) {
535
- let chatMessage = "";
536
- const duplicatesText = duplicates.map((url) => `${createTabSpace()}${url}`).join("\n");
537
- const missingText = missingInSitemap.map((url) => `${createTabSpace()}${url}`).join("\n");
538
- const errorText = createErrorResult(errors);
539
-
540
- if (duplicatesText) {
541
- logErrors(duplicatesText, DUPLICATES_TITLE);
542
- chatMessage += DUPLICATES_TITLE + duplicatesText;
543
- }
544
- if (missingText) {
545
- logErrors(missingText, MISSING_TITLE);
546
- chatMessage += MISSING_TITLE + missingText;
547
- }
548
- if (errorText) {
549
- logErrors(errorText, ERROR_TITLE);
550
- chatMessage += ERROR_TITLE + errorText;
551
- }
580
+ if (result.errorsSum > 0) {
581
+ const chatMessage = logResultErrors(webUrl, result);
552
582
 
553
583
  await sendGoogleChatMessage(chatMessage, googleWebhookUrl);
554
584
  }
555
585
 
556
- if (skippedUrls.length > 0) {
557
- const skippedUrlsText = createSkippedResult(skippedUrls);
586
+ if (result.skippedUrls.length > 0) {
587
+ const skippedUrlsText = createSkippedResult(result.skippedUrls);
558
588
  logErrors(skippedUrlsText, "\nSkipped origins:\n");
559
589
  }
560
590
 
561
- logStatistics(ok, Math.ceil(finishTime - startTime));
591
+ logStatistics(result.ok, Math.ceil(finishTime - startTime));
562
592
 
563
- process.exit(errors.length > 0 ? 1 : 0);
593
+ process.exit(result.errorsSum > 0 ? 1 : 0);
564
594
  };