@uxf/scripts 11.49.3 → 11.58.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/src/Sitemap.js +2 -2
- package/src/uxf-sitemap-check/cli.js +9 -16
- package/src/uxf-sitemap-check/index.js +210 -159
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@uxf/scripts",
|
|
3
|
-
"version": "11.
|
|
3
|
+
"version": "11.58.0",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"bin": {
|
|
@@ -33,6 +33,7 @@
|
|
|
33
33
|
"dayjs": "1.11.13",
|
|
34
34
|
"fast-glob": "3.3.2",
|
|
35
35
|
"madge": "8.0.0",
|
|
36
|
+
"robots-txt-parser": "2.0.3",
|
|
36
37
|
"yargs": "17.7.2"
|
|
37
38
|
}
|
|
38
39
|
}
|
package/src/Sitemap.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const https = require(
|
|
1
|
+
const https = require("https");
|
|
2
2
|
const { create } = require("axios");
|
|
3
3
|
const cheerio = require("cheerio");
|
|
4
4
|
|
|
@@ -18,7 +18,7 @@ async function getSitemap(xml) {
|
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
const agent = new https.Agent({
|
|
21
|
-
rejectUnauthorized: false
|
|
21
|
+
rejectUnauthorized: false,
|
|
22
22
|
});
|
|
23
23
|
|
|
24
24
|
const axios = create({
|
|
@@ -2,31 +2,20 @@ const { argv, env } = require("process");
|
|
|
2
2
|
|
|
3
3
|
module.exports = async () => {
|
|
4
4
|
const cli = require("yargs")
|
|
5
|
-
.command("$0", "UXF sitemap checker", yargs => {
|
|
5
|
+
.command("$0", "UXF sitemap checker", (yargs) => {
|
|
6
6
|
yargs.demandCommand(0, 0).usage(`UXF sitemap checker
|
|
7
7
|
Usage:
|
|
8
8
|
uxf-sitemap-check [options]
|
|
9
9
|
|
|
10
10
|
Environment variables:
|
|
11
11
|
HTTP_USERNAME - optional
|
|
12
|
-
HTTP_PASSWORD - optional
|
|
13
|
-
SLACK_TOKEN - optional`);
|
|
12
|
+
HTTP_PASSWORD - optional`);
|
|
14
13
|
})
|
|
15
14
|
.option("url", {
|
|
16
15
|
describe: "Sitemap url",
|
|
17
16
|
type: "string",
|
|
18
17
|
group: "Options",
|
|
19
18
|
})
|
|
20
|
-
.option("web-url", {
|
|
21
|
-
describe: "Web url for check",
|
|
22
|
-
type: "string",
|
|
23
|
-
group: "Options",
|
|
24
|
-
})
|
|
25
|
-
.option("slack-channel", {
|
|
26
|
-
describe: "Slack channel id.",
|
|
27
|
-
type: "string",
|
|
28
|
-
group: "Options",
|
|
29
|
-
})
|
|
30
19
|
.option("u", {
|
|
31
20
|
alias: "http-username",
|
|
32
21
|
describe: "Http authorization username (or use environment variable HTTP_USERNAME)",
|
|
@@ -44,11 +33,16 @@ Environment variables:
|
|
|
44
33
|
type: "string",
|
|
45
34
|
group: "Options",
|
|
46
35
|
})
|
|
47
|
-
.option("
|
|
36
|
+
.option("with-nested", {
|
|
48
37
|
describe: "If nested urls should be tested.",
|
|
49
38
|
type: "boolean",
|
|
50
39
|
group: "Options",
|
|
51
40
|
})
|
|
41
|
+
.option("with-images", {
|
|
42
|
+
describe: "If images should be tested.",
|
|
43
|
+
type: "boolean",
|
|
44
|
+
group: "Options",
|
|
45
|
+
})
|
|
52
46
|
.option("h", { alias: "help", group: "Options" })
|
|
53
47
|
.strict(false)
|
|
54
48
|
.exitProcess(false);
|
|
@@ -56,7 +50,6 @@ Environment variables:
|
|
|
56
50
|
try {
|
|
57
51
|
const { help, url, ...options } = cli.parse(argv.slice(2));
|
|
58
52
|
const skip = options.skip ? Number.parseInt(options.skip) : null;
|
|
59
|
-
const webUrl = options["web-url"] || null;
|
|
60
53
|
|
|
61
54
|
if (Boolean(help)) {
|
|
62
55
|
return 0;
|
|
@@ -67,7 +60,7 @@ Environment variables:
|
|
|
67
60
|
env.HTTP_PASSWORD = options["http-password"];
|
|
68
61
|
}
|
|
69
62
|
|
|
70
|
-
await require("./index")(url,
|
|
63
|
+
await require("./index")(url, skip, options["with-nested"], options["with-images"]);
|
|
71
64
|
} catch (e) {
|
|
72
65
|
console.error(e);
|
|
73
66
|
return 1;
|
|
@@ -1,23 +1,43 @@
|
|
|
1
|
-
const Slack = require("../Slack");
|
|
2
1
|
const Sitemap = require("../Sitemap");
|
|
3
2
|
const { performance } = require("perf_hooks");
|
|
4
|
-
const {
|
|
5
|
-
const { axios } = require("../Sitemap");
|
|
3
|
+
const { stdout } = require("process");
|
|
6
4
|
const cheerio = require("cheerio");
|
|
7
5
|
const GoogleChat = require("../GoogleChat");
|
|
6
|
+
const robotsTxtParser = require("robots-txt-parser");
|
|
7
|
+
|
|
8
|
+
const { HTTP_USERNAME, HTTP_PASSWORD } = process.env;
|
|
8
9
|
|
|
9
10
|
/**
|
|
10
|
-
*
|
|
11
|
-
* @typedef {{parentUrl: (string | undefined), isImg: boolean, time: number, ttl: number, url: string, status: number, message: (string | undefined), shouldIgnoreError: (boolean | undefined)}} UrlCheckResponse
|
|
11
|
+
* @typedef {{parentUrl: (string | undefined), isImg: boolean, ttl: number, url: string, status: number, message: (string | undefined), skipped: boolean, html: (string | null)}} UrlCheckResponse
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
14
|
const MAX_TTL = 3;
|
|
15
|
-
const TESTED_URLS = [];
|
|
16
15
|
const IMAGES_LABEL = "🏞 Images:";
|
|
17
16
|
const URLS_LABEL = "🔗 Links:";
|
|
18
17
|
|
|
18
|
+
const TESTED_URLS = [];
|
|
19
|
+
const URLS_TO_CHECK = new Set();
|
|
20
|
+
|
|
21
|
+
const robotsParser = robotsTxtParser({ userAgent: "uxf-bot", allowOnNeutral: false });
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* @param url {string}
|
|
25
|
+
* @param options {{redirect: boolean, isExternal: boolean}}
|
|
26
|
+
* @returns {Promise<Response>}
|
|
27
|
+
*/
|
|
28
|
+
function fetcher(url, options) {
|
|
29
|
+
return fetch(url, {
|
|
30
|
+
credentials: "include",
|
|
31
|
+
headers:
|
|
32
|
+
!options.isExternal && HTTP_USERNAME && HTTP_PASSWORD
|
|
33
|
+
? new Headers({ Authorization: "Basic " + btoa(`${HTTP_USERNAME}:${HTTP_PASSWORD}`) })
|
|
34
|
+
: undefined,
|
|
35
|
+
redirect: options.redirect ? "follow" : "manual",
|
|
36
|
+
signal: AbortSignal.timeout(20_000),
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
|
|
19
40
|
/**
|
|
20
|
-
*
|
|
21
41
|
* @param length {number}
|
|
22
42
|
* @return {string}
|
|
23
43
|
*/
|
|
@@ -26,7 +46,6 @@ function createTabSpace(length = 1) {
|
|
|
26
46
|
}
|
|
27
47
|
|
|
28
48
|
/**
|
|
29
|
-
*
|
|
30
49
|
* @param url {string}
|
|
31
50
|
* @return {boolean}
|
|
32
51
|
*/
|
|
@@ -35,37 +54,19 @@ function isImageUrl(url) {
|
|
|
35
54
|
}
|
|
36
55
|
|
|
37
56
|
/**
|
|
38
|
-
*
|
|
39
|
-
* @param url {string}
|
|
40
|
-
* @param status {number}
|
|
41
|
-
* @param e {Error}
|
|
42
|
-
* @returns {boolean}
|
|
43
|
-
*/
|
|
44
|
-
function shouldIgnoreError(url, status, e) {
|
|
45
|
-
if (status === 999 && url.startsWith("https://www.linkedin.com")) {
|
|
46
|
-
return true;
|
|
47
|
-
}
|
|
48
|
-
if ((status === -1 || status === 302) && url.startsWith("https://www.facebook.com/sharer/")) {
|
|
49
|
-
return true;
|
|
50
|
-
}
|
|
51
|
-
if (status === -3) {
|
|
52
|
-
return true;
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
return false;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
/**
|
|
59
|
-
*
|
|
60
57
|
* @param errors {UrlCheckResponse[]}
|
|
61
58
|
* @return {string}
|
|
62
59
|
*/
|
|
63
60
|
function createErrorList(errors) {
|
|
64
|
-
return errors
|
|
61
|
+
return errors
|
|
62
|
+
.map(
|
|
63
|
+
(err) =>
|
|
64
|
+
`${createTabSpace(3)}${err.url}${createTabSpace()}${err.status}${err.message ? ` – ${err.message}` : ""}`,
|
|
65
|
+
)
|
|
66
|
+
.join("\n");
|
|
65
67
|
}
|
|
66
68
|
|
|
67
69
|
/**
|
|
68
|
-
*
|
|
69
70
|
* @param errors {UrlCheckResponse[]}
|
|
70
71
|
* @return {string}
|
|
71
72
|
*/
|
|
@@ -73,13 +74,13 @@ function createErrorResult(errors) {
|
|
|
73
74
|
let parentPages = "";
|
|
74
75
|
let nestedPages = "";
|
|
75
76
|
|
|
76
|
-
const parentPagesErrors = errors.filter(url => url.parentUrl === undefined);
|
|
77
|
+
const parentPagesErrors = errors.filter((url) => url.parentUrl === undefined);
|
|
77
78
|
if (parentPagesErrors.length > 0) {
|
|
78
79
|
parentPages = `${createTabSpace()}Pages from sitemap:\n${createErrorList(parentPagesErrors)}\n`;
|
|
79
80
|
}
|
|
80
81
|
|
|
81
82
|
const nestedPagesErrors = errors
|
|
82
|
-
.filter(url => url.parentUrl !== undefined)
|
|
83
|
+
.filter((url) => url.parentUrl !== undefined)
|
|
83
84
|
.sort((prev, curr) => prev.parentUrl.localeCompare(curr.parentUrl));
|
|
84
85
|
for (let i = 0; i < nestedPagesErrors.length; i++) {
|
|
85
86
|
if (i === 0) {
|
|
@@ -92,8 +93,8 @@ function createErrorResult(errors) {
|
|
|
92
93
|
nestedPages += `${createTabSpace(1)}Page: ${nestedPagesErrors[i].parentUrl}\n`;
|
|
93
94
|
}
|
|
94
95
|
}
|
|
95
|
-
const images = nestedPagesErrors.filter(err => err.parentUrl === nestedPagesErrors[i].parentUrl && err.isImg);
|
|
96
|
-
const links = nestedPagesErrors.filter(err => err.parentUrl === nestedPagesErrors[i].parentUrl && !err.isImg);
|
|
96
|
+
const images = nestedPagesErrors.filter((err) => err.parentUrl === nestedPagesErrors[i].parentUrl && err.isImg);
|
|
97
|
+
const links = nestedPagesErrors.filter((err) => err.parentUrl === nestedPagesErrors[i].parentUrl && !err.isImg);
|
|
97
98
|
if (images.length > 0) {
|
|
98
99
|
nestedPages += `${createTabSpace(2)}${IMAGES_LABEL}\n${createErrorList(images)}\n`;
|
|
99
100
|
}
|
|
@@ -106,7 +107,21 @@ function createErrorResult(errors) {
|
|
|
106
107
|
}
|
|
107
108
|
|
|
108
109
|
/**
|
|
109
|
-
*
|
|
110
|
+
* @param skippedUrls {UrlCheckResponse[]}
|
|
111
|
+
* @return {string}
|
|
112
|
+
*/
|
|
113
|
+
function createSkippedResult(skippedUrls) {
|
|
114
|
+
const flattened = skippedUrls.flatMap((url) => new URL(url.url).origin);
|
|
115
|
+
const unique = [...new Set(flattened)];
|
|
116
|
+
|
|
117
|
+
if (unique.length > 0) {
|
|
118
|
+
return `${unique.map((err) => `${createTabSpace(2)}${err}`).join("\n")}\n`;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return "";
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
110
125
|
* @param incorrectLinks {string[]}
|
|
111
126
|
* @param webUrl {string}
|
|
112
127
|
* @return {string[]}
|
|
@@ -122,23 +137,83 @@ function createCorrectLinks(incorrectLinks, webUrl) {
|
|
|
122
137
|
}
|
|
123
138
|
|
|
124
139
|
/**
|
|
125
|
-
*
|
|
126
140
|
* @param url {string}
|
|
141
|
+
* @returns {boolean}
|
|
142
|
+
*/
|
|
143
|
+
function validURL(url) {
|
|
144
|
+
try {
|
|
145
|
+
new URL(url);
|
|
146
|
+
|
|
147
|
+
return true;
|
|
148
|
+
} catch {
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* @param url {string}
|
|
155
|
+
* @param webUrl {string}
|
|
127
156
|
* @param parentUrl {string | undefined}
|
|
128
157
|
* @param ttl {number}
|
|
129
158
|
* @return {Promise<UrlCheckResponse>}
|
|
130
159
|
*/
|
|
131
|
-
async function fetchUrl(url, parentUrl = undefined, ttl = 1) {
|
|
160
|
+
async function fetchUrl(url, webUrl, parentUrl = undefined, ttl = 1) {
|
|
161
|
+
if (!validURL(url)) {
|
|
162
|
+
return {
|
|
163
|
+
url,
|
|
164
|
+
parentUrl,
|
|
165
|
+
isImg: isImageUrl(url),
|
|
166
|
+
ttl,
|
|
167
|
+
status: 0,
|
|
168
|
+
message: "invalid url: " + url,
|
|
169
|
+
skipped: false,
|
|
170
|
+
html: null,
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
|
|
132
174
|
try {
|
|
133
|
-
|
|
134
|
-
|
|
175
|
+
const origin = new URL(url).origin;
|
|
176
|
+
|
|
177
|
+
if (parentUrl && origin !== webUrl) {
|
|
178
|
+
await robotsParser.useRobotsFor(origin);
|
|
179
|
+
|
|
180
|
+
const canCrawl = await robotsParser.canCrawl(url);
|
|
181
|
+
|
|
182
|
+
if (!canCrawl) {
|
|
183
|
+
return {
|
|
184
|
+
url,
|
|
185
|
+
parentUrl,
|
|
186
|
+
isImg: isImageUrl(url),
|
|
187
|
+
ttl,
|
|
188
|
+
status: 0,
|
|
189
|
+
message: "blocked by robots.txt",
|
|
190
|
+
skipped: true,
|
|
191
|
+
html: null,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
} catch (e) {
|
|
196
|
+
const errorStatus = await fetcher(new URL(url).origin + "/robots.txt", { isExternal: true, redirect: true })
|
|
197
|
+
.then((res) => (res.status === 200 ? -1 : res.status))
|
|
198
|
+
.catch((e) => e.response?.status);
|
|
135
199
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
200
|
+
return {
|
|
201
|
+
url,
|
|
202
|
+
parentUrl,
|
|
203
|
+
isImg: isImageUrl(url),
|
|
204
|
+
ttl,
|
|
205
|
+
status: errorStatus,
|
|
206
|
+
message: e.message,
|
|
207
|
+
skipped: errorStatus !== undefined,
|
|
208
|
+
html: null,
|
|
209
|
+
};
|
|
210
|
+
}
|
|
139
211
|
|
|
140
|
-
|
|
141
|
-
|
|
212
|
+
try {
|
|
213
|
+
const response = await fetcher(url, { redirect: !!parentUrl, isExternal: !url.includes(webUrl) });
|
|
214
|
+
|
|
215
|
+
if (response.status !== 200 && ttl < MAX_TTL) {
|
|
216
|
+
return await fetchUrl(url, webUrl, parentUrl, ttl + 1);
|
|
142
217
|
}
|
|
143
218
|
|
|
144
219
|
return {
|
|
@@ -146,46 +221,36 @@ async function fetchUrl(url, parentUrl = undefined, ttl = 1) {
|
|
|
146
221
|
parentUrl,
|
|
147
222
|
isImg: isImageUrl(url),
|
|
148
223
|
ttl,
|
|
149
|
-
status,
|
|
150
|
-
|
|
224
|
+
status: response.status,
|
|
225
|
+
skipped: false,
|
|
226
|
+
html: await response.text(),
|
|
151
227
|
};
|
|
152
228
|
} catch (e) {
|
|
153
229
|
const status = Number.parseInt((e && e.response && e.response.status) || -1, 10);
|
|
154
230
|
|
|
155
|
-
if (shouldIgnoreError(url, status, e)) {
|
|
156
|
-
return {
|
|
157
|
-
url,
|
|
158
|
-
parentUrl,
|
|
159
|
-
isImg: isImageUrl(url),
|
|
160
|
-
ttl,
|
|
161
|
-
status,
|
|
162
|
-
shouldIgnoreError: true,
|
|
163
|
-
time: 0,
|
|
164
|
-
};
|
|
165
|
-
}
|
|
166
231
|
return {
|
|
167
232
|
url,
|
|
168
233
|
parentUrl,
|
|
169
234
|
isImg: isImageUrl(url),
|
|
170
235
|
ttl,
|
|
171
236
|
status,
|
|
172
|
-
time: 0,
|
|
173
237
|
message: e.message,
|
|
174
|
-
|
|
238
|
+
skipped: false,
|
|
239
|
+
html: null,
|
|
175
240
|
};
|
|
176
241
|
}
|
|
177
242
|
}
|
|
178
243
|
|
|
179
244
|
/**
|
|
180
|
-
*
|
|
181
245
|
* @param url {string}
|
|
246
|
+
* @param webUrl {string}
|
|
182
247
|
* @param parentUrl {string | undefined}
|
|
183
248
|
* @return {UrlCheckResponse}
|
|
184
249
|
*/
|
|
185
|
-
async function testUrl(url, parentUrl = undefined) {
|
|
186
|
-
const indexInChecked = TESTED_URLS.findIndex(result => result.url === url);
|
|
250
|
+
async function testUrl(url, webUrl, parentUrl = undefined) {
|
|
251
|
+
const indexInChecked = TESTED_URLS.findIndex((result) => result.url === url);
|
|
187
252
|
if (indexInChecked === -1) {
|
|
188
|
-
const result = await fetchUrl(url, parentUrl);
|
|
253
|
+
const result = await fetchUrl(url, webUrl, parentUrl);
|
|
189
254
|
TESTED_URLS.push(result);
|
|
190
255
|
return result;
|
|
191
256
|
}
|
|
@@ -193,99 +258,109 @@ async function testUrl(url, parentUrl = undefined) {
|
|
|
193
258
|
}
|
|
194
259
|
|
|
195
260
|
/**
|
|
196
|
-
*
|
|
197
261
|
* @param urls {string[]}
|
|
198
262
|
* @param webUrl {string}
|
|
199
263
|
* @param sitemapUrl {string}
|
|
200
264
|
* @param skip {number}
|
|
201
|
-
* @param
|
|
265
|
+
* @param withNested {boolean}
|
|
266
|
+
* @param withImages {boolean}
|
|
202
267
|
* @return {Promise<void>}
|
|
203
268
|
*/
|
|
204
|
-
async function testSitemapUrls(urls, webUrl, sitemapUrl, skip,
|
|
269
|
+
async function testSitemapUrls(urls, webUrl, sitemapUrl, skip, withNested, withImages) {
|
|
270
|
+
for (let i = skip || 0; i < urls.length; i++) {
|
|
271
|
+
URLS_TO_CHECK.add(urls[i]);
|
|
272
|
+
}
|
|
273
|
+
|
|
205
274
|
for (let i = skip || 0; i < urls.length; i++) {
|
|
206
275
|
const url = urls[i];
|
|
207
|
-
const changedUrl = webUrl ? `${webUrl}${new URL(url).pathname}` :
|
|
276
|
+
const changedUrl = webUrl ? `${webUrl}${new URL(url).pathname}` : url;
|
|
208
277
|
|
|
209
|
-
|
|
278
|
+
const result = await testUrl(changedUrl, webUrl);
|
|
279
|
+
printProgress();
|
|
210
280
|
|
|
211
|
-
|
|
212
|
-
|
|
281
|
+
if (withNested && result.status === 200) {
|
|
282
|
+
await testNestedUrls(result.html, changedUrl, i, webUrl ?? sitemapUrl.split("/").slice(0, 3).join("/"));
|
|
283
|
+
}
|
|
213
284
|
|
|
214
|
-
if (
|
|
215
|
-
await
|
|
285
|
+
if (withImages && result.status === 200) {
|
|
286
|
+
await testNestedImages(result.html, changedUrl, i, webUrl ?? sitemapUrl.split("/").slice(0, 3).join("/"));
|
|
216
287
|
}
|
|
217
288
|
}
|
|
218
289
|
}
|
|
219
290
|
|
|
220
291
|
/**
|
|
221
|
-
*
|
|
292
|
+
* @param html {string}
|
|
222
293
|
* @param parentUrl {string}
|
|
223
294
|
* @param parentIndex {number}
|
|
224
295
|
* @param webUrl {string}
|
|
225
296
|
* @return {Promise<void>}
|
|
226
297
|
*/
|
|
227
|
-
async function
|
|
228
|
-
const
|
|
229
|
-
const $ = cheerio.load(data);
|
|
298
|
+
async function testNestedUrls(html, parentUrl, parentIndex, webUrl) {
|
|
299
|
+
const $ = cheerio.load(html);
|
|
230
300
|
const urls = createCorrectLinks(
|
|
231
301
|
$("a[href]").map((i, node) => $(node).attr("href")),
|
|
232
302
|
webUrl,
|
|
233
303
|
);
|
|
304
|
+
|
|
305
|
+
await testNested(urls, parentIndex, parentUrl, createTabSpace() + URLS_LABEL, webUrl);
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* @param html {string}
|
|
310
|
+
* @param parentUrl {string}
|
|
311
|
+
* @param parentIndex {number}
|
|
312
|
+
* @param webUrl {string}
|
|
313
|
+
* @return {Promise<void>}
|
|
314
|
+
*/
|
|
315
|
+
async function testNestedImages(html, parentUrl, parentIndex, webUrl) {
|
|
316
|
+
const $ = cheerio.load(html);
|
|
234
317
|
const images = createCorrectLinks(
|
|
235
318
|
$("img[src]").map((i, node) => $(node).attr("src")),
|
|
236
319
|
webUrl,
|
|
237
320
|
);
|
|
238
321
|
|
|
239
|
-
await testNested(images, parentIndex, parentUrl, createTabSpace() + IMAGES_LABEL);
|
|
240
|
-
await testNested(urls, parentIndex, parentUrl, createTabSpace() + URLS_LABEL);
|
|
322
|
+
await testNested(images, parentIndex, parentUrl, createTabSpace() + IMAGES_LABEL, webUrl);
|
|
241
323
|
}
|
|
242
324
|
|
|
243
325
|
/**
|
|
244
|
-
*
|
|
245
326
|
* @param urls {string[]}
|
|
246
327
|
* @param parentIndex {number}
|
|
247
328
|
* @param parentUrl {string}
|
|
248
329
|
* @param label {string}
|
|
330
|
+
* @param webUrl {string}
|
|
249
331
|
* @return {Promise<void>}
|
|
250
332
|
*/
|
|
251
|
-
async function testNested(urls, parentIndex, parentUrl, label) {
|
|
333
|
+
async function testNested(urls, parentIndex, parentUrl, label, webUrl) {
|
|
252
334
|
if (urls.length === 0) {
|
|
253
335
|
return;
|
|
254
336
|
}
|
|
255
337
|
|
|
256
|
-
|
|
338
|
+
for (let i = 0 || 0; i < urls.length; i++) {
|
|
339
|
+
URLS_TO_CHECK.add(urls[i]);
|
|
340
|
+
}
|
|
341
|
+
|
|
257
342
|
for (let i = 0; i < urls.length; i++) {
|
|
258
|
-
if (TESTED_URLS.findIndex(result => result.url === urls[i]) !== -1) {
|
|
343
|
+
if (TESTED_URLS.findIndex((result) => result.url === urls[i]) !== -1) {
|
|
259
344
|
continue;
|
|
260
345
|
}
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
printUrlResult(result);
|
|
346
|
+
await testUrl(urls[i], webUrl, parentUrl);
|
|
347
|
+
printProgress();
|
|
264
348
|
}
|
|
265
349
|
}
|
|
266
350
|
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
/**
|
|
279
|
-
*
|
|
280
|
-
* @param result {UrlCheckResponse}
|
|
281
|
-
*/
|
|
282
|
-
function printUrlResult(result) {
|
|
283
|
-
const { ttl, status, time, message } = result;
|
|
284
|
-
stdout.write(`${createTabSpace()}${status}${message ? " – " + message : ""} (${time}ms) ttl=${ttl} ${status === 200 ? "✅ " : "❌ "}\n`);
|
|
351
|
+
let lastLogTime = 0;
|
|
352
|
+
function printProgress() {
|
|
353
|
+
const now = Date.now();
|
|
354
|
+
// Only log every 5 seconds in non-TTY
|
|
355
|
+
if (now - lastLogTime > 5000) {
|
|
356
|
+
stdout.write(
|
|
357
|
+
`Progress: ${TESTED_URLS.length}/${URLS_TO_CHECK.size} completed, ${TESTED_URLS.filter((u) => u.skipped).length} skipped, ${TESTED_URLS.filter((u) => u.status !== 200 && u.skipped === false).length} errors\n`,
|
|
358
|
+
);
|
|
359
|
+
lastLogTime = now;
|
|
360
|
+
}
|
|
285
361
|
}
|
|
286
362
|
|
|
287
363
|
/**
|
|
288
|
-
*
|
|
289
364
|
* @param errorText {string}
|
|
290
365
|
* @param title {string}
|
|
291
366
|
*/
|
|
@@ -295,7 +370,6 @@ function logErrors(errorText, title) {
|
|
|
295
370
|
}
|
|
296
371
|
|
|
297
372
|
/**
|
|
298
|
-
*
|
|
299
373
|
* @param millis {number}
|
|
300
374
|
* @return {string}
|
|
301
375
|
*/
|
|
@@ -306,105 +380,82 @@ function convertTime(millis) {
|
|
|
306
380
|
}
|
|
307
381
|
|
|
308
382
|
/**
|
|
309
|
-
*
|
|
310
383
|
* @param okResults {UrlCheckResponse[]}
|
|
311
384
|
* @param time {number}
|
|
312
385
|
*/
|
|
313
386
|
function logStatistics(okResults, time) {
|
|
314
|
-
const avgTime = Math.round(okResults.reduce((prev, curr) => prev + curr.time, 0) / TESTED_URLS.length);
|
|
315
|
-
const maxTime = okResults.reduce((prev, curr) => (curr.time > prev.time ? curr : prev), []);
|
|
316
|
-
const minTime = okResults.reduce((prev, curr) => (curr.time < prev.time ? curr : prev), []);
|
|
317
|
-
|
|
318
387
|
stdout.write("\nSummary:\n");
|
|
319
388
|
stdout.write(createTabSpace() + `Time ${convertTime(time)}\n`);
|
|
320
389
|
stdout.write(
|
|
321
|
-
createTabSpace() + "Images tested:" + createTabSpace() + TESTED_URLS.filter(url => url.isImg).length + "\n",
|
|
390
|
+
createTabSpace() + "Images tested:" + createTabSpace() + TESTED_URLS.filter((url) => url.isImg).length + "\n",
|
|
322
391
|
);
|
|
323
392
|
stdout.write(
|
|
324
|
-
createTabSpace() + "Links tested:" + createTabSpace() + TESTED_URLS.filter(url => !url.isImg).length + "\n",
|
|
393
|
+
createTabSpace() + "Links tested:" + createTabSpace() + TESTED_URLS.filter((url) => !url.isImg).length + "\n",
|
|
325
394
|
);
|
|
326
|
-
stdout.write(createTabSpace() + "Avg time:" + createTabSpace() + avgTime + "ms\n");
|
|
327
395
|
stdout.write(
|
|
328
|
-
createTabSpace() + "
|
|
396
|
+
createTabSpace() + "Skipped:" + createTabSpace() + TESTED_URLS.filter((url) => url.skipped).length + "\n",
|
|
329
397
|
);
|
|
330
398
|
stdout.write(
|
|
331
|
-
createTabSpace() +
|
|
399
|
+
createTabSpace() +
|
|
400
|
+
"Errors:" +
|
|
401
|
+
createTabSpace() +
|
|
402
|
+
TESTED_URLS.filter((url) => url.status !== 200 && url.skipped === false).length +
|
|
403
|
+
"\n",
|
|
332
404
|
);
|
|
333
405
|
}
|
|
334
406
|
|
|
335
407
|
/**
|
|
336
|
-
*
|
|
337
|
-
* @param errorText {string}
|
|
338
|
-
* @param slackChannel {string}
|
|
339
|
-
* @return {Promise<void>}
|
|
340
|
-
*/
|
|
341
|
-
async function sendSlackMessage(errorText, slackChannel) {
|
|
342
|
-
await Slack.chatPostMessage(slackChannel, {
|
|
343
|
-
text: ":warning: Odkazy uvedené v sitemap.xml nejsou dostupné",
|
|
344
|
-
attachments: [
|
|
345
|
-
{
|
|
346
|
-
text: errorText,
|
|
347
|
-
},
|
|
348
|
-
],
|
|
349
|
-
});
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
/**
|
|
353
|
-
*
|
|
354
408
|
* @param resultErrors {string}
|
|
355
409
|
* @return {Promise<void>}
|
|
356
410
|
*/
|
|
357
411
|
async function sendGoogleChatMessage(resultErrors) {
|
|
358
412
|
await GoogleChat.chatPostMessage({
|
|
359
|
-
text: resultErrors
|
|
413
|
+
text: resultErrors,
|
|
360
414
|
});
|
|
361
415
|
}
|
|
362
416
|
|
|
363
417
|
/**
|
|
364
|
-
*
|
|
365
418
|
* @param sitemapUrl {string}
|
|
366
|
-
* @param webUrl {string}
|
|
367
|
-
* @param slackChannel {string}
|
|
368
419
|
* @param skip {number}
|
|
369
|
-
* @param
|
|
420
|
+
* @param withNested {boolean}
|
|
421
|
+
* @param withImages {boolean}
|
|
370
422
|
* @return {Promise<*>}
|
|
371
423
|
*/
|
|
372
|
-
module.exports = async function run(sitemapUrl,
|
|
424
|
+
module.exports = async function run(sitemapUrl, skip, withNested, withImages) {
|
|
373
425
|
if (!sitemapUrl) {
|
|
374
426
|
stdout.write("⛔ Required parameter --url is empty.\n");
|
|
375
427
|
return process.exit(1);
|
|
376
428
|
}
|
|
377
429
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
return process.exit(1);
|
|
381
|
-
}
|
|
430
|
+
const url = new URL(sitemapUrl);
|
|
431
|
+
const webUrl = url.origin;
|
|
382
432
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
433
|
+
stdout.write(`${createTabSpace()}Sitemap url: ${sitemapUrl}\n`);
|
|
434
|
+
stdout.write(`${createTabSpace()}Web url: ${webUrl}\n\n`);
|
|
435
|
+
if (withNested) {
|
|
436
|
+
stdout.write(`${createTabSpace()}Will test nested links\n`);
|
|
437
|
+
}
|
|
438
|
+
if (withImages) {
|
|
439
|
+
stdout.write(`${createTabSpace()}Will test images\n\n`);
|
|
386
440
|
}
|
|
387
441
|
|
|
388
442
|
const startTime = performance.now();
|
|
389
|
-
await testSitemapUrls(
|
|
443
|
+
await testSitemapUrls(await Sitemap.getSitemap(sitemapUrl), webUrl, sitemapUrl, skip, withNested, withImages);
|
|
390
444
|
const finishTime = performance.now();
|
|
391
445
|
|
|
392
|
-
const errors = TESTED_URLS.filter(r => r.status !== 200 && r.
|
|
393
|
-
const
|
|
394
|
-
const ok = TESTED_URLS.filter(r => r.status === 200);
|
|
446
|
+
const errors = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === false);
|
|
447
|
+
const skippedUrls = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === true);
|
|
448
|
+
const ok = TESTED_URLS.filter((r) => r.status === 200);
|
|
395
449
|
|
|
396
450
|
if (errors.length > 0) {
|
|
397
451
|
const errorText = createErrorResult(errors);
|
|
398
|
-
logErrors(errorText, "\nErrors:\n");
|
|
399
|
-
const ignoredErrorText = createErrorResult(ignoredErrors);
|
|
400
|
-
logErrors(ignoredErrorText, "\nIngored errors:\n");
|
|
401
|
-
await sendSlackMessage(errorText, slackChannel);
|
|
452
|
+
logErrors(errorText, "\n\n\nErrors:\n");
|
|
402
453
|
await sendGoogleChatMessage(errorText);
|
|
403
454
|
}
|
|
404
455
|
|
|
405
|
-
if (
|
|
406
|
-
const
|
|
407
|
-
logErrors(
|
|
456
|
+
if (skippedUrls.length > 0) {
|
|
457
|
+
const skippedUrlsText = createSkippedResult(skippedUrls);
|
|
458
|
+
logErrors(skippedUrlsText, "\nSkipped origins:\n");
|
|
408
459
|
}
|
|
409
460
|
|
|
410
461
|
logStatistics(ok, Math.ceil(finishTime - startTime));
|