@uxf/scripts 11.63.0 → 11.64.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/src/uxf-sitemap-check/index.js +83 -33
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@uxf/scripts",
|
|
3
|
-
"version": "11.
|
|
3
|
+
"version": "11.64.1",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"bin": {
|
|
@@ -33,6 +33,7 @@
|
|
|
33
33
|
"cheerio": "1.0.0",
|
|
34
34
|
"dayjs": "1.11.13",
|
|
35
35
|
"fast-glob": "3.3.2",
|
|
36
|
+
"got": "14.4.7",
|
|
36
37
|
"madge": "8.0.0",
|
|
37
38
|
"robots-txt-parser": "2.0.3",
|
|
38
39
|
"yargs": "17.7.2"
|
|
@@ -5,6 +5,8 @@ const cheerio = require("cheerio");
|
|
|
5
5
|
const GoogleChat = require("../GoogleChat");
|
|
6
6
|
const robotsTxtParser = require("robots-txt-parser");
|
|
7
7
|
|
|
8
|
+
const got = (url, init) => import("got").then((mod) => mod.default(url, init));
|
|
9
|
+
|
|
8
10
|
const { HTTP_USERNAME, HTTP_PASSWORD } = process.env;
|
|
9
11
|
|
|
10
12
|
/**
|
|
@@ -17,7 +19,7 @@ const DUPLICATES_TITLE = "\n\n\nDuplicated pages in sitemap:\n";
|
|
|
17
19
|
const MISSING_TITLE = "\n\n\nMissing pages in sitemap:\n";
|
|
18
20
|
const ERROR_TITLE = "\n\n\nErrors:\n";
|
|
19
21
|
|
|
20
|
-
const MAX_TTL =
|
|
22
|
+
const MAX_TTL = 1;
|
|
21
23
|
const IMAGES_LABEL = "🏞 Images:";
|
|
22
24
|
const URLS_LABEL = "🔗 Links:";
|
|
23
25
|
|
|
@@ -50,17 +52,42 @@ function getUrlOrigin(url) {
|
|
|
50
52
|
/**
|
|
51
53
|
* @param url {string}
|
|
52
54
|
* @param options {{redirect: boolean, isExternal: boolean}}
|
|
53
|
-
* @returns {Promise<Response
|
|
55
|
+
* @returns {Promise<import('got').Response<string>>}
|
|
54
56
|
*/
|
|
55
57
|
function fetcher(url, options) {
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
58
|
+
const shouldUseBasicAuth = !options.isExternal && HTTP_USERNAME && HTTP_PASSWORD;
|
|
59
|
+
|
|
60
|
+
const headers = new Headers({
|
|
61
|
+
"User-Agent":
|
|
62
|
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
|
|
63
|
+
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
64
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
65
|
+
"Accept-Language": "en-US,en;q=0.9,cs-CZ;q=0.8,cs;q=0.7,de;q=0.6",
|
|
66
|
+
"Cache-Control": "no-cache",
|
|
67
|
+
Connection: "keep-alive",
|
|
68
|
+
Pragma: "no-cache",
|
|
69
|
+
"Sec-Ch-Ua": '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
|
|
70
|
+
"Sec-Ch-Ua-Arch": '"x86"',
|
|
71
|
+
"Sec-Ch-Ua-Mobile": "?0",
|
|
72
|
+
"Sec-Ch-Ua-Platform": '"Windows"',
|
|
73
|
+
"Sec-Fetch-Dest": "document",
|
|
74
|
+
"Sec-Fetch-Mode": "navigate",
|
|
75
|
+
"Sec-Fetch-Site": "cross-site",
|
|
76
|
+
"Sec-Fetch-User": "?1",
|
|
77
|
+
"Sec-Fetch-User-Agent": "?1",
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
return got(url, {
|
|
81
|
+
throwHttpErrors: false,
|
|
82
|
+
decompress: false,
|
|
83
|
+
https: {
|
|
84
|
+
rejectUnauthorized: false,
|
|
85
|
+
},
|
|
86
|
+
headers,
|
|
87
|
+
username: shouldUseBasicAuth ? HTTP_USERNAME : undefined,
|
|
88
|
+
password: shouldUseBasicAuth ? HTTP_PASSWORD : undefined,
|
|
89
|
+
followRedirect: options.redirect,
|
|
90
|
+
signal: AbortSignal.timeout(30_000),
|
|
64
91
|
});
|
|
65
92
|
}
|
|
66
93
|
|
|
@@ -226,7 +253,7 @@ async function fetchUrl(url, webUrl, parentUrl = undefined, ttl = 1) {
|
|
|
226
253
|
}
|
|
227
254
|
} catch (e) {
|
|
228
255
|
const errorStatus = await fetcher(new URL(url).origin + "/robots.txt", { isExternal: true, redirect: true })
|
|
229
|
-
.then((res) => (res.
|
|
256
|
+
.then((res) => (res.statusCode === 200 ? -1 : res.statusCode))
|
|
230
257
|
.catch((e) => e.response?.status);
|
|
231
258
|
|
|
232
259
|
return {
|
|
@@ -237,7 +264,7 @@ async function fetchUrl(url, webUrl, parentUrl = undefined, ttl = 1) {
|
|
|
237
264
|
ttl,
|
|
238
265
|
status: errorStatus,
|
|
239
266
|
message: e.message,
|
|
240
|
-
skipped:
|
|
267
|
+
skipped: true,
|
|
241
268
|
html: null,
|
|
242
269
|
redirected: false,
|
|
243
270
|
};
|
|
@@ -246,7 +273,22 @@ async function fetchUrl(url, webUrl, parentUrl = undefined, ttl = 1) {
|
|
|
246
273
|
try {
|
|
247
274
|
const response = await fetcher(url, { redirect: !!parentUrl, isExternal: !url.includes(webUrl) });
|
|
248
275
|
|
|
249
|
-
if (response.
|
|
276
|
+
if (response.statusCode === 403 && response.headers["server"] === "cloudflare") {
|
|
277
|
+
return {
|
|
278
|
+
url,
|
|
279
|
+
parentUrl,
|
|
280
|
+
isImg: isImageUrl(url),
|
|
281
|
+
isWebPage: true,
|
|
282
|
+
ttl,
|
|
283
|
+
status: 0,
|
|
284
|
+
message: "blocked by server",
|
|
285
|
+
skipped: true,
|
|
286
|
+
html: null,
|
|
287
|
+
redirected: false,
|
|
288
|
+
};
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
if (response.statusCode !== 200 && ttl < MAX_TTL) {
|
|
250
292
|
return await fetchUrl(url, webUrl, parentUrl, ttl + 1);
|
|
251
293
|
}
|
|
252
294
|
|
|
@@ -254,12 +296,12 @@ async function fetchUrl(url, webUrl, parentUrl = undefined, ttl = 1) {
|
|
|
254
296
|
url,
|
|
255
297
|
parentUrl,
|
|
256
298
|
isImg: isImageUrl(url),
|
|
257
|
-
isWebPage: response.headers
|
|
299
|
+
isWebPage: response.headers["content-type"]?.includes("text/html") ?? true,
|
|
258
300
|
ttl,
|
|
259
|
-
status: response.
|
|
301
|
+
status: response.statusCode,
|
|
260
302
|
skipped: false,
|
|
261
|
-
html:
|
|
262
|
-
redirected: response.
|
|
303
|
+
html: response.body,
|
|
304
|
+
redirected: response.redirectUrls.length > 0,
|
|
263
305
|
};
|
|
264
306
|
} catch (e) {
|
|
265
307
|
const status = Number.parseInt((e && e.response && e.response.status) || -1, 10);
|
|
@@ -301,7 +343,7 @@ async function testUrl(url, webUrl, parentUrl = undefined) {
|
|
|
301
343
|
redirected: result.redirected,
|
|
302
344
|
indexable:
|
|
303
345
|
result.isWebPage && typeof result.html === "string"
|
|
304
|
-
? cheerio.load(result.html)("meta[name='robots']").attr("content")
|
|
346
|
+
? !cheerio.load(result.html)("meta[name='robots']").attr("content")?.includes("noindex")
|
|
305
347
|
: false,
|
|
306
348
|
canonicalUrl:
|
|
307
349
|
result.isWebPage && typeof result.html === "string"
|
|
@@ -590,25 +632,33 @@ module.exports = async function run(sitemapUrl, skip, withNested, withImages, ch
|
|
|
590
632
|
|
|
591
633
|
logInitialInfo(sitemapUrl, webUrl, withNested, withImages, checkMissing, shouldReportMissing);
|
|
592
634
|
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
635
|
+
try {
|
|
636
|
+
const startTime = performance.now();
|
|
637
|
+
const sitemapUrls = await Sitemap.getSitemap(sitemapUrl);
|
|
638
|
+
await testSitemapUrls(sitemapUrls, webUrl, sitemapUrl, skip, withNested, withImages);
|
|
639
|
+
const finishTime = performance.now();
|
|
597
640
|
|
|
598
|
-
|
|
641
|
+
const result = getResult(webUrl, sitemapUrls, shouldReportMissing);
|
|
599
642
|
|
|
600
|
-
|
|
601
|
-
|
|
643
|
+
if (result.errorsSum > 0) {
|
|
644
|
+
const chatMessage = logResultErrors(webUrl, result);
|
|
602
645
|
|
|
603
|
-
|
|
604
|
-
|
|
646
|
+
await sendGoogleChatMessage(chatMessage, googleWebhookUrl);
|
|
647
|
+
}
|
|
605
648
|
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
649
|
+
if (result.skippedUrls.length > 0) {
|
|
650
|
+
const skippedUrlsText = createSkippedResult(result.skippedUrls);
|
|
651
|
+
logErrors(skippedUrlsText, "\nSkipped origins:\n");
|
|
652
|
+
}
|
|
610
653
|
|
|
611
|
-
|
|
654
|
+
logStatistics(result.ok, Math.ceil(finishTime - startTime));
|
|
612
655
|
|
|
613
|
-
|
|
656
|
+
process.exit(result.errorsSum > 0 ? 1 : 0);
|
|
657
|
+
} catch (e) {
|
|
658
|
+
stdout.write("⛔ Error: " + e.message + "\n");
|
|
659
|
+
|
|
660
|
+
await sendGoogleChatMessage(`Sitemap check failed completely:\n\n${e.message}`, googleWebhookUrl);
|
|
661
|
+
|
|
662
|
+
process.exit(1);
|
|
663
|
+
}
|
|
614
664
|
};
|