@uxf/scripts 11.63.0 → 11.64.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@uxf/scripts",
3
- "version": "11.63.0",
3
+ "version": "11.64.1",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -33,6 +33,7 @@
33
33
  "cheerio": "1.0.0",
34
34
  "dayjs": "1.11.13",
35
35
  "fast-glob": "3.3.2",
36
+ "got": "14.4.7",
36
37
  "madge": "8.0.0",
37
38
  "robots-txt-parser": "2.0.3",
38
39
  "yargs": "17.7.2"
@@ -5,6 +5,8 @@ const cheerio = require("cheerio");
5
5
  const GoogleChat = require("../GoogleChat");
6
6
  const robotsTxtParser = require("robots-txt-parser");
7
7
 
8
+ const got = (url, init) => import("got").then((mod) => mod.default(url, init));
9
+
8
10
  const { HTTP_USERNAME, HTTP_PASSWORD } = process.env;
9
11
 
10
12
  /**
@@ -17,7 +19,7 @@ const DUPLICATES_TITLE = "\n\n\nDuplicated pages in sitemap:\n";
17
19
  const MISSING_TITLE = "\n\n\nMissing pages in sitemap:\n";
18
20
  const ERROR_TITLE = "\n\n\nErrors:\n";
19
21
 
20
- const MAX_TTL = 3;
22
+ const MAX_TTL = 1;
21
23
  const IMAGES_LABEL = "🏞 Images:";
22
24
  const URLS_LABEL = "🔗 Links:";
23
25
 
@@ -50,17 +52,42 @@ function getUrlOrigin(url) {
50
52
  /**
51
53
  * @param url {string}
52
54
  * @param options {{redirect: boolean, isExternal: boolean}}
53
- * @returns {Promise<Response>}
55
+ * @returns {Promise<import('got').Response<string>>}
54
56
  */
55
57
  function fetcher(url, options) {
56
- return fetch(url, {
57
- credentials: "include",
58
- headers:
59
- !options.isExternal && HTTP_USERNAME && HTTP_PASSWORD
60
- ? new Headers({ Authorization: "Basic " + btoa(`${HTTP_USERNAME}:${HTTP_PASSWORD}`) })
61
- : undefined,
62
- redirect: options.redirect ? "follow" : "manual",
63
- signal: AbortSignal.timeout(20_000),
58
+ const shouldUseBasicAuth = !options.isExternal && HTTP_USERNAME && HTTP_PASSWORD;
59
+
60
+ const headers = new Headers({
61
+ "User-Agent":
62
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
63
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
64
+ "Accept-Encoding": "gzip, deflate, br",
65
+ "Accept-Language": "en-US,en;q=0.9,cs-CZ;q=0.8,cs;q=0.7,de;q=0.6",
66
+ "Cache-Control": "no-cache",
67
+ Connection: "keep-alive",
68
+ Pragma: "no-cache",
69
+ "Sec-Ch-Ua": '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
70
+ "Sec-Ch-Ua-Arch": '"x86"',
71
+ "Sec-Ch-Ua-Mobile": "?0",
72
+ "Sec-Ch-Ua-Platform": '"Windows"',
73
+ "Sec-Fetch-Dest": "document",
74
+ "Sec-Fetch-Mode": "navigate",
75
+ "Sec-Fetch-Site": "cross-site",
76
+ "Sec-Fetch-User": "?1",
77
+ "Sec-Fetch-User-Agent": "?1",
78
+ });
79
+
80
+ return got(url, {
81
+ throwHttpErrors: false,
82
+ decompress: false,
83
+ https: {
84
+ rejectUnauthorized: false,
85
+ },
86
+ headers,
87
+ username: shouldUseBasicAuth ? HTTP_USERNAME : undefined,
88
+ password: shouldUseBasicAuth ? HTTP_PASSWORD : undefined,
89
+ followRedirect: options.redirect,
90
+ signal: AbortSignal.timeout(30_000),
64
91
  });
65
92
  }
66
93
 
@@ -226,7 +253,7 @@ async function fetchUrl(url, webUrl, parentUrl = undefined, ttl = 1) {
226
253
  }
227
254
  } catch (e) {
228
255
  const errorStatus = await fetcher(new URL(url).origin + "/robots.txt", { isExternal: true, redirect: true })
229
- .then((res) => (res.status === 200 ? -1 : res.status))
256
+ .then((res) => (res.statusCode === 200 ? -1 : res.statusCode))
230
257
  .catch((e) => e.response?.status);
231
258
 
232
259
  return {
@@ -237,7 +264,7 @@ async function fetchUrl(url, webUrl, parentUrl = undefined, ttl = 1) {
237
264
  ttl,
238
265
  status: errorStatus,
239
266
  message: e.message,
240
- skipped: errorStatus !== undefined,
267
+ skipped: true,
241
268
  html: null,
242
269
  redirected: false,
243
270
  };
@@ -246,7 +273,22 @@ async function fetchUrl(url, webUrl, parentUrl = undefined, ttl = 1) {
246
273
  try {
247
274
  const response = await fetcher(url, { redirect: !!parentUrl, isExternal: !url.includes(webUrl) });
248
275
 
249
- if (response.status !== 200 && ttl < MAX_TTL) {
276
+ if (response.statusCode === 403 && response.headers["server"] === "cloudflare") {
277
+ return {
278
+ url,
279
+ parentUrl,
280
+ isImg: isImageUrl(url),
281
+ isWebPage: true,
282
+ ttl,
283
+ status: 0,
284
+ message: "blocked by server",
285
+ skipped: true,
286
+ html: null,
287
+ redirected: false,
288
+ };
289
+ }
290
+
291
+ if (response.statusCode !== 200 && ttl < MAX_TTL) {
250
292
  return await fetchUrl(url, webUrl, parentUrl, ttl + 1);
251
293
  }
252
294
 
@@ -254,12 +296,12 @@ async function fetchUrl(url, webUrl, parentUrl = undefined, ttl = 1) {
254
296
  url,
255
297
  parentUrl,
256
298
  isImg: isImageUrl(url),
257
- isWebPage: response.headers.get("content-type")?.includes("text/html") ?? true,
299
+ isWebPage: response.headers["content-type"]?.includes("text/html") ?? true,
258
300
  ttl,
259
- status: response.status,
301
+ status: response.statusCode,
260
302
  skipped: false,
261
- html: await response.text(),
262
- redirected: response.redirected,
303
+ html: response.body,
304
+ redirected: response.redirectUrls.length > 0,
263
305
  };
264
306
  } catch (e) {
265
307
  const status = Number.parseInt((e && e.response && e.response.status) || -1, 10);
@@ -301,7 +343,7 @@ async function testUrl(url, webUrl, parentUrl = undefined) {
301
343
  redirected: result.redirected,
302
344
  indexable:
303
345
  result.isWebPage && typeof result.html === "string"
304
- ? cheerio.load(result.html)("meta[name='robots']").attr("content") !== "noindex"
346
+ ? !cheerio.load(result.html)("meta[name='robots']").attr("content")?.includes("noindex")
305
347
  : false,
306
348
  canonicalUrl:
307
349
  result.isWebPage && typeof result.html === "string"
@@ -590,25 +632,33 @@ module.exports = async function run(sitemapUrl, skip, withNested, withImages, ch
590
632
 
591
633
  logInitialInfo(sitemapUrl, webUrl, withNested, withImages, checkMissing, shouldReportMissing);
592
634
 
593
- const startTime = performance.now();
594
- const sitemapUrls = await Sitemap.getSitemap(sitemapUrl);
595
- await testSitemapUrls(sitemapUrls, webUrl, sitemapUrl, skip, withNested, withImages);
596
- const finishTime = performance.now();
635
+ try {
636
+ const startTime = performance.now();
637
+ const sitemapUrls = await Sitemap.getSitemap(sitemapUrl);
638
+ await testSitemapUrls(sitemapUrls, webUrl, sitemapUrl, skip, withNested, withImages);
639
+ const finishTime = performance.now();
597
640
 
598
- const result = getResult(webUrl, sitemapUrls, shouldReportMissing);
641
+ const result = getResult(webUrl, sitemapUrls, shouldReportMissing);
599
642
 
600
- if (result.errorsSum > 0) {
601
- const chatMessage = logResultErrors(webUrl, result);
643
+ if (result.errorsSum > 0) {
644
+ const chatMessage = logResultErrors(webUrl, result);
602
645
 
603
- await sendGoogleChatMessage(chatMessage, googleWebhookUrl);
604
- }
646
+ await sendGoogleChatMessage(chatMessage, googleWebhookUrl);
647
+ }
605
648
 
606
- if (result.skippedUrls.length > 0) {
607
- const skippedUrlsText = createSkippedResult(result.skippedUrls);
608
- logErrors(skippedUrlsText, "\nSkipped origins:\n");
609
- }
649
+ if (result.skippedUrls.length > 0) {
650
+ const skippedUrlsText = createSkippedResult(result.skippedUrls);
651
+ logErrors(skippedUrlsText, "\nSkipped origins:\n");
652
+ }
610
653
 
611
- logStatistics(result.ok, Math.ceil(finishTime - startTime));
654
+ logStatistics(result.ok, Math.ceil(finishTime - startTime));
612
655
 
613
- process.exit(result.errorsSum > 0 ? 1 : 0);
656
+ process.exit(result.errorsSum > 0 ? 1 : 0);
657
+ } catch (e) {
658
+ stdout.write("⛔ Error: " + e.message + "\n");
659
+
660
+ await sendGoogleChatMessage(`Sitemap check failed completely:\n\n${e.message}`, googleWebhookUrl);
661
+
662
+ process.exit(1);
663
+ }
614
664
  };