@uxf/scripts 11.47.0 → 11.53.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/src/GoogleChat.js +6 -1
- package/src/Sitemap.js +2 -2
- package/src/uxf-sitemap-check/cli.js +9 -16
- package/src/uxf-sitemap-check/index.js +206 -158
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@uxf/scripts",
|
|
3
|
-
"version": "11.
|
|
3
|
+
"version": "11.53.0",
|
|
4
4
|
"description": "",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"bin": {
|
|
@@ -33,6 +33,7 @@
|
|
|
33
33
|
"dayjs": "1.11.13",
|
|
34
34
|
"fast-glob": "3.3.2",
|
|
35
35
|
"madge": "8.0.0",
|
|
36
|
+
"robots-txt-parser": "2.0.3",
|
|
36
37
|
"yargs": "17.7.2"
|
|
37
38
|
}
|
|
38
39
|
}
|
package/src/GoogleChat.js
CHANGED
|
@@ -9,7 +9,12 @@ const axios = create({});
|
|
|
9
9
|
*/
|
|
10
10
|
async function chatPostMessage(data, dryRun) {
|
|
11
11
|
if (env.GOOGLE_WEBHOOK_URL && !dryRun) {
|
|
12
|
-
|
|
12
|
+
try {
|
|
13
|
+
await axios.post(env.GOOGLE_WEBHOOK_URL, { ...data });
|
|
14
|
+
} catch (error) {
|
|
15
|
+
process.stderr.write("GOOGLE CHAT: chat.postMessage - error");
|
|
16
|
+
console.error(error);
|
|
17
|
+
}
|
|
13
18
|
} else {
|
|
14
19
|
process.stdout.write("GOOGLE CHAT: chat.postMessage - skipped");
|
|
15
20
|
}
|
package/src/Sitemap.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const https = require(
|
|
1
|
+
const https = require("https");
|
|
2
2
|
const { create } = require("axios");
|
|
3
3
|
const cheerio = require("cheerio");
|
|
4
4
|
|
|
@@ -18,7 +18,7 @@ async function getSitemap(xml) {
|
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
const agent = new https.Agent({
|
|
21
|
-
rejectUnauthorized: false
|
|
21
|
+
rejectUnauthorized: false,
|
|
22
22
|
});
|
|
23
23
|
|
|
24
24
|
const axios = create({
|
|
@@ -2,31 +2,20 @@ const { argv, env } = require("process");
|
|
|
2
2
|
|
|
3
3
|
module.exports = async () => {
|
|
4
4
|
const cli = require("yargs")
|
|
5
|
-
.command("$0", "UXF sitemap checker", yargs => {
|
|
5
|
+
.command("$0", "UXF sitemap checker", (yargs) => {
|
|
6
6
|
yargs.demandCommand(0, 0).usage(`UXF sitemap checker
|
|
7
7
|
Usage:
|
|
8
8
|
uxf-sitemap-check [options]
|
|
9
9
|
|
|
10
10
|
Environment variables:
|
|
11
11
|
HTTP_USERNAME - optional
|
|
12
|
-
HTTP_PASSWORD - optional
|
|
13
|
-
SLACK_TOKEN - optional`);
|
|
12
|
+
HTTP_PASSWORD - optional`);
|
|
14
13
|
})
|
|
15
14
|
.option("url", {
|
|
16
15
|
describe: "Sitemap url",
|
|
17
16
|
type: "string",
|
|
18
17
|
group: "Options",
|
|
19
18
|
})
|
|
20
|
-
.option("web-url", {
|
|
21
|
-
describe: "Web url for check",
|
|
22
|
-
type: "string",
|
|
23
|
-
group: "Options",
|
|
24
|
-
})
|
|
25
|
-
.option("slack-channel", {
|
|
26
|
-
describe: "Slack channel id.",
|
|
27
|
-
type: "string",
|
|
28
|
-
group: "Options",
|
|
29
|
-
})
|
|
30
19
|
.option("u", {
|
|
31
20
|
alias: "http-username",
|
|
32
21
|
describe: "Http authorization username (or use environment variable HTTP_USERNAME)",
|
|
@@ -44,11 +33,16 @@ Environment variables:
|
|
|
44
33
|
type: "string",
|
|
45
34
|
group: "Options",
|
|
46
35
|
})
|
|
47
|
-
.option("
|
|
36
|
+
.option("with-nested", {
|
|
48
37
|
describe: "If nested urls should be tested.",
|
|
49
38
|
type: "boolean",
|
|
50
39
|
group: "Options",
|
|
51
40
|
})
|
|
41
|
+
.option("with-images", {
|
|
42
|
+
describe: "If images should be tested.",
|
|
43
|
+
type: "boolean",
|
|
44
|
+
group: "Options",
|
|
45
|
+
})
|
|
52
46
|
.option("h", { alias: "help", group: "Options" })
|
|
53
47
|
.strict(false)
|
|
54
48
|
.exitProcess(false);
|
|
@@ -56,7 +50,6 @@ Environment variables:
|
|
|
56
50
|
try {
|
|
57
51
|
const { help, url, ...options } = cli.parse(argv.slice(2));
|
|
58
52
|
const skip = options.skip ? Number.parseInt(options.skip) : null;
|
|
59
|
-
const webUrl = options["web-url"] || null;
|
|
60
53
|
|
|
61
54
|
if (Boolean(help)) {
|
|
62
55
|
return 0;
|
|
@@ -67,7 +60,7 @@ Environment variables:
|
|
|
67
60
|
env.HTTP_PASSWORD = options["http-password"];
|
|
68
61
|
}
|
|
69
62
|
|
|
70
|
-
await require("./index")(url,
|
|
63
|
+
await require("./index")(url, skip, options["with-nested"], options["with-images"]);
|
|
71
64
|
} catch (e) {
|
|
72
65
|
console.error(e);
|
|
73
66
|
return 1;
|
|
@@ -1,23 +1,43 @@
|
|
|
1
|
-
const Slack = require("../Slack");
|
|
2
1
|
const Sitemap = require("../Sitemap");
|
|
3
2
|
const { performance } = require("perf_hooks");
|
|
4
|
-
const {
|
|
5
|
-
const { axios } = require("../Sitemap");
|
|
3
|
+
const { stdout } = require("process");
|
|
6
4
|
const cheerio = require("cheerio");
|
|
7
5
|
const GoogleChat = require("../GoogleChat");
|
|
6
|
+
const robotsTxtParser = require("robots-txt-parser");
|
|
7
|
+
|
|
8
|
+
const { HTTP_USERNAME, HTTP_PASSWORD } = process.env;
|
|
8
9
|
|
|
9
10
|
/**
|
|
10
|
-
*
|
|
11
|
-
* @typedef {{parentUrl: (string | undefined), isImg: boolean, time: number, ttl: number, url: string, status: number, message: (string | undefined), shouldIgnoreError: (boolean | undefined)}} UrlCheckResponse
|
|
11
|
+
* @typedef {{parentUrl: (string | undefined), isImg: boolean, ttl: number, url: string, status: number, message: (string | undefined), skipped: boolean, html: (string | null)}} UrlCheckResponse
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
14
|
const MAX_TTL = 3;
|
|
15
|
-
const TESTED_URLS = [];
|
|
16
15
|
const IMAGES_LABEL = "🏞 Images:";
|
|
17
16
|
const URLS_LABEL = "🔗 Links:";
|
|
18
17
|
|
|
18
|
+
const TESTED_URLS = [];
|
|
19
|
+
const URLS_TO_CHECK = new Set();
|
|
20
|
+
|
|
21
|
+
const robotsParser = robotsTxtParser({ userAgent: "uxf-bot", allowOnNeutral: false });
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* @param url {string}
|
|
25
|
+
* @param options {{redirect: boolean, isExternal: boolean}}
|
|
26
|
+
* @returns {Promise<Response>}
|
|
27
|
+
*/
|
|
28
|
+
function fetcher(url, options) {
|
|
29
|
+
return fetch(url, {
|
|
30
|
+
credentials: "include",
|
|
31
|
+
headers:
|
|
32
|
+
!options.isExternal && HTTP_USERNAME && HTTP_PASSWORD
|
|
33
|
+
? new Headers({ Authorization: "Basic " + btoa(`${HTTP_USERNAME}:${HTTP_PASSWORD}`) })
|
|
34
|
+
: undefined,
|
|
35
|
+
redirect: options.redirect ? "follow" : "manual",
|
|
36
|
+
signal: AbortSignal.timeout(20_000),
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
|
|
19
40
|
/**
|
|
20
|
-
*
|
|
21
41
|
* @param length {number}
|
|
22
42
|
* @return {string}
|
|
23
43
|
*/
|
|
@@ -26,7 +46,6 @@ function createTabSpace(length = 1) {
|
|
|
26
46
|
}
|
|
27
47
|
|
|
28
48
|
/**
|
|
29
|
-
*
|
|
30
49
|
* @param url {string}
|
|
31
50
|
* @return {boolean}
|
|
32
51
|
*/
|
|
@@ -35,37 +54,19 @@ function isImageUrl(url) {
|
|
|
35
54
|
}
|
|
36
55
|
|
|
37
56
|
/**
|
|
38
|
-
*
|
|
39
|
-
* @param url {string}
|
|
40
|
-
* @param status {number}
|
|
41
|
-
* @param e {Error}
|
|
42
|
-
* @returns {boolean}
|
|
43
|
-
*/
|
|
44
|
-
function shouldIgnoreError(url, status, e) {
|
|
45
|
-
if (status === 999 && url.startsWith("https://www.linkedin.com")) {
|
|
46
|
-
return true;
|
|
47
|
-
}
|
|
48
|
-
if ((status === -1 || status === 302) && url.startsWith("https://www.facebook.com/sharer/")) {
|
|
49
|
-
return true;
|
|
50
|
-
}
|
|
51
|
-
if (status === -3) {
|
|
52
|
-
return true;
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
return false;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
/**
|
|
59
|
-
*
|
|
60
57
|
* @param errors {UrlCheckResponse[]}
|
|
61
58
|
* @return {string}
|
|
62
59
|
*/
|
|
63
60
|
function createErrorList(errors) {
|
|
64
|
-
return errors
|
|
61
|
+
return errors
|
|
62
|
+
.map(
|
|
63
|
+
(err) =>
|
|
64
|
+
`${createTabSpace(3)}${err.url}${createTabSpace()}${err.status}${err.message ? ` – ${err.message}` : ""}`,
|
|
65
|
+
)
|
|
66
|
+
.join("\n");
|
|
65
67
|
}
|
|
66
68
|
|
|
67
69
|
/**
|
|
68
|
-
*
|
|
69
70
|
* @param errors {UrlCheckResponse[]}
|
|
70
71
|
* @return {string}
|
|
71
72
|
*/
|
|
@@ -73,13 +74,13 @@ function createErrorResult(errors) {
|
|
|
73
74
|
let parentPages = "";
|
|
74
75
|
let nestedPages = "";
|
|
75
76
|
|
|
76
|
-
const parentPagesErrors = errors.filter(url => url.parentUrl === undefined);
|
|
77
|
+
const parentPagesErrors = errors.filter((url) => url.parentUrl === undefined);
|
|
77
78
|
if (parentPagesErrors.length > 0) {
|
|
78
79
|
parentPages = `${createTabSpace()}Pages from sitemap:\n${createErrorList(parentPagesErrors)}\n`;
|
|
79
80
|
}
|
|
80
81
|
|
|
81
82
|
const nestedPagesErrors = errors
|
|
82
|
-
.filter(url => url.parentUrl !== undefined)
|
|
83
|
+
.filter((url) => url.parentUrl !== undefined)
|
|
83
84
|
.sort((prev, curr) => prev.parentUrl.localeCompare(curr.parentUrl));
|
|
84
85
|
for (let i = 0; i < nestedPagesErrors.length; i++) {
|
|
85
86
|
if (i === 0) {
|
|
@@ -92,8 +93,8 @@ function createErrorResult(errors) {
|
|
|
92
93
|
nestedPages += `${createTabSpace(1)}Page: ${nestedPagesErrors[i].parentUrl}\n`;
|
|
93
94
|
}
|
|
94
95
|
}
|
|
95
|
-
const images = nestedPagesErrors.filter(err => err.parentUrl === nestedPagesErrors[i].parentUrl && err.isImg);
|
|
96
|
-
const links = nestedPagesErrors.filter(err => err.parentUrl === nestedPagesErrors[i].parentUrl && !err.isImg);
|
|
96
|
+
const images = nestedPagesErrors.filter((err) => err.parentUrl === nestedPagesErrors[i].parentUrl && err.isImg);
|
|
97
|
+
const links = nestedPagesErrors.filter((err) => err.parentUrl === nestedPagesErrors[i].parentUrl && !err.isImg);
|
|
97
98
|
if (images.length > 0) {
|
|
98
99
|
nestedPages += `${createTabSpace(2)}${IMAGES_LABEL}\n${createErrorList(images)}\n`;
|
|
99
100
|
}
|
|
@@ -106,7 +107,21 @@ function createErrorResult(errors) {
|
|
|
106
107
|
}
|
|
107
108
|
|
|
108
109
|
/**
|
|
109
|
-
*
|
|
110
|
+
* @param skippedUrls {UrlCheckResponse[]}
|
|
111
|
+
* @return {string}
|
|
112
|
+
*/
|
|
113
|
+
function createSkippedResult(skippedUrls) {
|
|
114
|
+
const flattened = skippedUrls.flatMap((url) => new URL(url.url).origin);
|
|
115
|
+
const unique = [...new Set(flattened)];
|
|
116
|
+
|
|
117
|
+
if (unique.length > 0) {
|
|
118
|
+
return `${unique.map((err) => `${createTabSpace(2)}${err}`).join("\n")}\n`;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
return "";
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
110
125
|
* @param incorrectLinks {string[]}
|
|
111
126
|
* @param webUrl {string}
|
|
112
127
|
* @return {string[]}
|
|
@@ -122,23 +137,83 @@ function createCorrectLinks(incorrectLinks, webUrl) {
|
|
|
122
137
|
}
|
|
123
138
|
|
|
124
139
|
/**
|
|
125
|
-
*
|
|
126
140
|
* @param url {string}
|
|
141
|
+
* @returns {boolean}
|
|
142
|
+
*/
|
|
143
|
+
function validURL(url) {
|
|
144
|
+
try {
|
|
145
|
+
new URL(url);
|
|
146
|
+
|
|
147
|
+
return true;
|
|
148
|
+
} catch {
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* @param url {string}
|
|
155
|
+
* @param webUrl {string}
|
|
127
156
|
* @param parentUrl {string | undefined}
|
|
128
157
|
* @param ttl {number}
|
|
129
158
|
* @return {Promise<UrlCheckResponse>}
|
|
130
159
|
*/
|
|
131
|
-
async function fetchUrl(url, parentUrl = undefined, ttl = 1) {
|
|
160
|
+
async function fetchUrl(url, webUrl, parentUrl = undefined, ttl = 1) {
|
|
161
|
+
if (!validURL(url)) {
|
|
162
|
+
return {
|
|
163
|
+
url,
|
|
164
|
+
parentUrl,
|
|
165
|
+
isImg: isImageUrl(url),
|
|
166
|
+
ttl,
|
|
167
|
+
status: 0,
|
|
168
|
+
message: "invalid url: " + url,
|
|
169
|
+
skipped: false,
|
|
170
|
+
html: null,
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
|
|
132
174
|
try {
|
|
133
|
-
|
|
134
|
-
|
|
175
|
+
const origin = new URL(url).origin;
|
|
176
|
+
|
|
177
|
+
if (parentUrl && origin !== webUrl) {
|
|
178
|
+
await robotsParser.useRobotsFor(origin);
|
|
179
|
+
|
|
180
|
+
const canCrawl = await robotsParser.canCrawl(url);
|
|
181
|
+
|
|
182
|
+
if (!canCrawl) {
|
|
183
|
+
return {
|
|
184
|
+
url,
|
|
185
|
+
parentUrl,
|
|
186
|
+
isImg: isImageUrl(url),
|
|
187
|
+
ttl,
|
|
188
|
+
status: 0,
|
|
189
|
+
message: "blocked by robots.txt",
|
|
190
|
+
skipped: true,
|
|
191
|
+
html: null,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
} catch (e) {
|
|
196
|
+
const errorStatus = await fetcher(new URL(url).origin + "/robots.txt", { isExternal: true, redirect: true })
|
|
197
|
+
.then((res) => (res.status === 200 ? -1 : res.status))
|
|
198
|
+
.catch((e) => e.response?.status);
|
|
135
199
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
200
|
+
return {
|
|
201
|
+
url,
|
|
202
|
+
parentUrl,
|
|
203
|
+
isImg: isImageUrl(url),
|
|
204
|
+
ttl,
|
|
205
|
+
status: errorStatus,
|
|
206
|
+
message: e.message,
|
|
207
|
+
skipped: errorStatus !== undefined,
|
|
208
|
+
html: null,
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
try {
|
|
213
|
+
const response = await fetcher(url, { redirect: !!parentUrl, isExternal: !url.includes(webUrl) });
|
|
139
214
|
|
|
140
|
-
if (status !== 200 && ttl < MAX_TTL) {
|
|
141
|
-
return await fetchUrl(
|
|
215
|
+
if (response.status !== 200 && ttl < MAX_TTL) {
|
|
216
|
+
return await fetchUrl(url, webUrl, parentUrl, ttl + 1);
|
|
142
217
|
}
|
|
143
218
|
|
|
144
219
|
return {
|
|
@@ -146,46 +221,36 @@ async function fetchUrl(url, parentUrl = undefined, ttl = 1) {
|
|
|
146
221
|
parentUrl,
|
|
147
222
|
isImg: isImageUrl(url),
|
|
148
223
|
ttl,
|
|
149
|
-
status,
|
|
150
|
-
|
|
224
|
+
status: response.status,
|
|
225
|
+
skipped: false,
|
|
226
|
+
html: await response.text(),
|
|
151
227
|
};
|
|
152
228
|
} catch (e) {
|
|
153
229
|
const status = Number.parseInt((e && e.response && e.response.status) || -1, 10);
|
|
154
230
|
|
|
155
|
-
if (shouldIgnoreError(url, status, e)) {
|
|
156
|
-
return {
|
|
157
|
-
url,
|
|
158
|
-
parentUrl,
|
|
159
|
-
isImg: isImageUrl(url),
|
|
160
|
-
ttl,
|
|
161
|
-
status,
|
|
162
|
-
shouldIgnoreError: true,
|
|
163
|
-
time: 0,
|
|
164
|
-
};
|
|
165
|
-
}
|
|
166
231
|
return {
|
|
167
232
|
url,
|
|
168
233
|
parentUrl,
|
|
169
234
|
isImg: isImageUrl(url),
|
|
170
235
|
ttl,
|
|
171
236
|
status,
|
|
172
|
-
time: 0,
|
|
173
237
|
message: e.message,
|
|
174
|
-
|
|
238
|
+
skipped: false,
|
|
239
|
+
html: null,
|
|
175
240
|
};
|
|
176
241
|
}
|
|
177
242
|
}
|
|
178
243
|
|
|
179
244
|
/**
|
|
180
|
-
*
|
|
181
245
|
* @param url {string}
|
|
246
|
+
* @param webUrl {string}
|
|
182
247
|
* @param parentUrl {string | undefined}
|
|
183
248
|
* @return {UrlCheckResponse}
|
|
184
249
|
*/
|
|
185
|
-
async function testUrl(url, parentUrl = undefined) {
|
|
186
|
-
const indexInChecked = TESTED_URLS.findIndex(result => result.url === url);
|
|
250
|
+
async function testUrl(url, webUrl, parentUrl = undefined) {
|
|
251
|
+
const indexInChecked = TESTED_URLS.findIndex((result) => result.url === url);
|
|
187
252
|
if (indexInChecked === -1) {
|
|
188
|
-
const result = await fetchUrl(url, parentUrl);
|
|
253
|
+
const result = await fetchUrl(url, webUrl, parentUrl);
|
|
189
254
|
TESTED_URLS.push(result);
|
|
190
255
|
return result;
|
|
191
256
|
}
|
|
@@ -193,99 +258,106 @@ async function testUrl(url, parentUrl = undefined) {
|
|
|
193
258
|
}
|
|
194
259
|
|
|
195
260
|
/**
|
|
196
|
-
*
|
|
197
261
|
* @param urls {string[]}
|
|
198
262
|
* @param webUrl {string}
|
|
199
263
|
* @param sitemapUrl {string}
|
|
200
264
|
* @param skip {number}
|
|
201
|
-
* @param
|
|
265
|
+
* @param withNested {boolean}
|
|
266
|
+
* @param withImages {boolean}
|
|
202
267
|
* @return {Promise<void>}
|
|
203
268
|
*/
|
|
204
|
-
async function testSitemapUrls(urls, webUrl, sitemapUrl, skip,
|
|
269
|
+
async function testSitemapUrls(urls, webUrl, sitemapUrl, skip, withNested, withImages) {
|
|
270
|
+
for (let i = skip || 0; i < urls.length; i++) {
|
|
271
|
+
URLS_TO_CHECK.add(urls[i]);
|
|
272
|
+
}
|
|
273
|
+
|
|
205
274
|
for (let i = skip || 0; i < urls.length; i++) {
|
|
206
275
|
const url = urls[i];
|
|
207
|
-
const changedUrl = webUrl ? `${webUrl}${new URL(url).pathname}` :
|
|
276
|
+
const changedUrl = webUrl ? `${webUrl}${new URL(url).pathname}` : url;
|
|
208
277
|
|
|
209
|
-
|
|
278
|
+
const result = await testUrl(changedUrl, webUrl);
|
|
279
|
+
printProgress();
|
|
210
280
|
|
|
211
|
-
|
|
212
|
-
|
|
281
|
+
if (withNested && result.status === 200) {
|
|
282
|
+
await testNestedUrls(result.html, changedUrl, i, webUrl ?? sitemapUrl.split("/").slice(0, 3).join("/"));
|
|
283
|
+
}
|
|
213
284
|
|
|
214
|
-
if (
|
|
215
|
-
await
|
|
285
|
+
if (withImages && result.status === 200) {
|
|
286
|
+
await testNestedImages(result.html, changedUrl, i, webUrl ?? sitemapUrl.split("/").slice(0, 3).join("/"));
|
|
216
287
|
}
|
|
217
288
|
}
|
|
218
289
|
}
|
|
219
290
|
|
|
220
291
|
/**
|
|
221
|
-
*
|
|
292
|
+
* @param html {string}
|
|
222
293
|
* @param parentUrl {string}
|
|
223
294
|
* @param parentIndex {number}
|
|
224
295
|
* @param webUrl {string}
|
|
225
296
|
* @return {Promise<void>}
|
|
226
297
|
*/
|
|
227
|
-
async function
|
|
228
|
-
const
|
|
229
|
-
const $ = cheerio.load(data);
|
|
298
|
+
async function testNestedUrls(html, parentUrl, parentIndex, webUrl) {
|
|
299
|
+
const $ = cheerio.load(html);
|
|
230
300
|
const urls = createCorrectLinks(
|
|
231
301
|
$("a[href]").map((i, node) => $(node).attr("href")),
|
|
232
302
|
webUrl,
|
|
233
303
|
);
|
|
304
|
+
|
|
305
|
+
await testNested(urls, parentIndex, parentUrl, createTabSpace() + URLS_LABEL, webUrl);
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* @param html {string}
|
|
310
|
+
* @param parentUrl {string}
|
|
311
|
+
* @param parentIndex {number}
|
|
312
|
+
* @param webUrl {string}
|
|
313
|
+
* @return {Promise<void>}
|
|
314
|
+
*/
|
|
315
|
+
async function testNestedImages(html, parentUrl, parentIndex, webUrl) {
|
|
316
|
+
const $ = cheerio.load(html);
|
|
234
317
|
const images = createCorrectLinks(
|
|
235
318
|
$("img[src]").map((i, node) => $(node).attr("src")),
|
|
236
319
|
webUrl,
|
|
237
320
|
);
|
|
238
321
|
|
|
239
|
-
await testNested(images, parentIndex, parentUrl, createTabSpace() + IMAGES_LABEL);
|
|
240
|
-
await testNested(urls, parentIndex, parentUrl, createTabSpace() + URLS_LABEL);
|
|
322
|
+
await testNested(images, parentIndex, parentUrl, createTabSpace() + IMAGES_LABEL, webUrl);
|
|
241
323
|
}
|
|
242
324
|
|
|
243
325
|
/**
|
|
244
|
-
*
|
|
245
326
|
* @param urls {string[]}
|
|
246
327
|
* @param parentIndex {number}
|
|
247
328
|
* @param parentUrl {string}
|
|
248
329
|
* @param label {string}
|
|
330
|
+
* @param webUrl {string}
|
|
249
331
|
* @return {Promise<void>}
|
|
250
332
|
*/
|
|
251
|
-
async function testNested(urls, parentIndex, parentUrl, label) {
|
|
333
|
+
async function testNested(urls, parentIndex, parentUrl, label, webUrl) {
|
|
252
334
|
if (urls.length === 0) {
|
|
253
335
|
return;
|
|
254
336
|
}
|
|
255
337
|
|
|
256
|
-
|
|
338
|
+
for (let i = 0 || 0; i < urls.length; i++) {
|
|
339
|
+
URLS_TO_CHECK.add(urls[i]);
|
|
340
|
+
}
|
|
341
|
+
|
|
257
342
|
for (let i = 0; i < urls.length; i++) {
|
|
258
|
-
if (TESTED_URLS.findIndex(result => result.url === urls[i]) !== -1) {
|
|
343
|
+
if (TESTED_URLS.findIndex((result) => result.url === urls[i]) !== -1) {
|
|
259
344
|
continue;
|
|
260
345
|
}
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
printUrlResult(result);
|
|
346
|
+
await testUrl(urls[i], webUrl, parentUrl);
|
|
347
|
+
printProgress();
|
|
264
348
|
}
|
|
265
349
|
}
|
|
266
350
|
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
* @param urlIndex {number}
|
|
271
|
-
* @param allUrlsCount {number}
|
|
272
|
-
* @param prefix {string}
|
|
273
|
-
*/
|
|
274
|
-
function printUrlInfo(url, urlIndex, allUrlsCount, prefix = "") {
|
|
275
|
-
stdout.write(`${prefix}${urlIndex + 1} / ${allUrlsCount}${createTabSpace()}${url}`);
|
|
276
|
-
}
|
|
351
|
+
function printProgress() {
|
|
352
|
+
stdout.clearLine(0);
|
|
353
|
+
stdout.cursorTo(0);
|
|
277
354
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
*/
|
|
282
|
-
function printUrlResult(result) {
|
|
283
|
-
const { ttl, status, time, message } = result;
|
|
284
|
-
stdout.write(`${createTabSpace()}${status}${message ? " – " + message : ""} (${time}ms) ttl=${ttl} ${status === 200 ? "✅ " : "❌ "}\n`);
|
|
355
|
+
stdout.write(`Completed: ${TESTED_URLS.length}/${URLS_TO_CHECK.size}${createTabSpace(2)}`);
|
|
356
|
+
stdout.write(`Skipped: ${TESTED_URLS.filter((u) => u.skipped).length}${createTabSpace(2)}`);
|
|
357
|
+
stdout.write(`Errors: ${TESTED_URLS.filter((u) => u.status !== 200 && u.skipped === false).length}`);
|
|
285
358
|
}
|
|
286
359
|
|
|
287
360
|
/**
|
|
288
|
-
*
|
|
289
361
|
* @param errorText {string}
|
|
290
362
|
* @param title {string}
|
|
291
363
|
*/
|
|
@@ -295,7 +367,6 @@ function logErrors(errorText, title) {
|
|
|
295
367
|
}
|
|
296
368
|
|
|
297
369
|
/**
|
|
298
|
-
*
|
|
299
370
|
* @param millis {number}
|
|
300
371
|
* @return {string}
|
|
301
372
|
*/
|
|
@@ -306,105 +377,82 @@ function convertTime(millis) {
|
|
|
306
377
|
}
|
|
307
378
|
|
|
308
379
|
/**
|
|
309
|
-
*
|
|
310
380
|
* @param okResults {UrlCheckResponse[]}
|
|
311
381
|
* @param time {number}
|
|
312
382
|
*/
|
|
313
383
|
function logStatistics(okResults, time) {
|
|
314
|
-
const avgTime = Math.round(okResults.reduce((prev, curr) => prev + curr.time, 0) / TESTED_URLS.length);
|
|
315
|
-
const maxTime = okResults.reduce((prev, curr) => (curr.time > prev.time ? curr : prev), []);
|
|
316
|
-
const minTime = okResults.reduce((prev, curr) => (curr.time < prev.time ? curr : prev), []);
|
|
317
|
-
|
|
318
384
|
stdout.write("\nSummary:\n");
|
|
319
385
|
stdout.write(createTabSpace() + `Time ${convertTime(time)}\n`);
|
|
320
386
|
stdout.write(
|
|
321
|
-
createTabSpace() + "Images tested:" + createTabSpace() + TESTED_URLS.filter(url => url.isImg).length + "\n",
|
|
387
|
+
createTabSpace() + "Images tested:" + createTabSpace() + TESTED_URLS.filter((url) => url.isImg).length + "\n",
|
|
322
388
|
);
|
|
323
389
|
stdout.write(
|
|
324
|
-
createTabSpace() + "Links tested:" + createTabSpace() + TESTED_URLS.filter(url => !url.isImg).length + "\n",
|
|
390
|
+
createTabSpace() + "Links tested:" + createTabSpace() + TESTED_URLS.filter((url) => !url.isImg).length + "\n",
|
|
325
391
|
);
|
|
326
|
-
stdout.write(createTabSpace() + "Avg time:" + createTabSpace() + avgTime + "ms\n");
|
|
327
392
|
stdout.write(
|
|
328
|
-
createTabSpace() + "
|
|
393
|
+
createTabSpace() + "Skipped:" + createTabSpace() + TESTED_URLS.filter((url) => url.skipped).length + "\n",
|
|
329
394
|
);
|
|
330
395
|
stdout.write(
|
|
331
|
-
createTabSpace() +
|
|
396
|
+
createTabSpace() +
|
|
397
|
+
"Errors:" +
|
|
398
|
+
createTabSpace() +
|
|
399
|
+
TESTED_URLS.filter((url) => url.status !== 200 && url.skipped === false).length +
|
|
400
|
+
"\n",
|
|
332
401
|
);
|
|
333
402
|
}
|
|
334
403
|
|
|
335
404
|
/**
|
|
336
|
-
*
|
|
337
|
-
* @param errorText {string}
|
|
338
|
-
* @param slackChannel {string}
|
|
339
|
-
* @return {Promise<void>}
|
|
340
|
-
*/
|
|
341
|
-
async function sendSlackMessage(errorText, slackChannel) {
|
|
342
|
-
await Slack.chatPostMessage(slackChannel, {
|
|
343
|
-
text: ":warning: Odkazy uvedené v sitemap.xml nejsou dostupné",
|
|
344
|
-
attachments: [
|
|
345
|
-
{
|
|
346
|
-
text: errorText,
|
|
347
|
-
},
|
|
348
|
-
],
|
|
349
|
-
});
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
/**
|
|
353
|
-
*
|
|
354
405
|
* @param resultErrors {string}
|
|
355
406
|
* @return {Promise<void>}
|
|
356
407
|
*/
|
|
357
408
|
async function sendGoogleChatMessage(resultErrors) {
|
|
358
409
|
await GoogleChat.chatPostMessage({
|
|
359
|
-
text: resultErrors
|
|
410
|
+
text: resultErrors,
|
|
360
411
|
});
|
|
361
412
|
}
|
|
362
413
|
|
|
363
414
|
/**
|
|
364
|
-
*
|
|
365
415
|
* @param sitemapUrl {string}
|
|
366
|
-
* @param webUrl {string}
|
|
367
|
-
* @param slackChannel {string}
|
|
368
416
|
* @param skip {number}
|
|
369
|
-
* @param
|
|
417
|
+
* @param withNested {boolean}
|
|
418
|
+
* @param withImages {boolean}
|
|
370
419
|
* @return {Promise<*>}
|
|
371
420
|
*/
|
|
372
|
-
module.exports = async function run(sitemapUrl,
|
|
421
|
+
module.exports = async function run(sitemapUrl, skip, withNested, withImages) {
|
|
373
422
|
if (!sitemapUrl) {
|
|
374
423
|
stdout.write("⛔ Required parameter --url is empty.\n");
|
|
375
424
|
return process.exit(1);
|
|
376
425
|
}
|
|
377
426
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
return process.exit(1);
|
|
381
|
-
}
|
|
427
|
+
const url = new URL(sitemapUrl);
|
|
428
|
+
const webUrl = url.origin;
|
|
382
429
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
430
|
+
stdout.write(`${createTabSpace()}Sitemap url: ${sitemapUrl}\n`);
|
|
431
|
+
stdout.write(`${createTabSpace()}Web url: ${webUrl}\n\n`);
|
|
432
|
+
if (withNested) {
|
|
433
|
+
stdout.write(`${createTabSpace()}Will test nested links\n`);
|
|
434
|
+
}
|
|
435
|
+
if (withImages) {
|
|
436
|
+
stdout.write(`${createTabSpace()}Will test images\n\n`);
|
|
386
437
|
}
|
|
387
438
|
|
|
388
439
|
const startTime = performance.now();
|
|
389
|
-
await testSitemapUrls(
|
|
440
|
+
await testSitemapUrls(await Sitemap.getSitemap(sitemapUrl), webUrl, sitemapUrl, skip, withNested, withImages);
|
|
390
441
|
const finishTime = performance.now();
|
|
391
442
|
|
|
392
|
-
const errors = TESTED_URLS.filter(r => r.status !== 200 && r.
|
|
393
|
-
const
|
|
394
|
-
const ok = TESTED_URLS.filter(r => r.status === 200);
|
|
443
|
+
const errors = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === false);
|
|
444
|
+
const skippedUrls = TESTED_URLS.filter((r) => r.status !== 200 && r.skipped === true);
|
|
445
|
+
const ok = TESTED_URLS.filter((r) => r.status === 200);
|
|
395
446
|
|
|
396
447
|
if (errors.length > 0) {
|
|
397
448
|
const errorText = createErrorResult(errors);
|
|
398
|
-
logErrors(errorText, "\nErrors:\n");
|
|
399
|
-
const ignoredErrorText = createErrorResult(ignoredErrors);
|
|
400
|
-
logErrors(ignoredErrorText, "\nIngored errors:\n");
|
|
401
|
-
await sendSlackMessage(errorText, slackChannel);
|
|
449
|
+
logErrors(errorText, "\n\n\nErrors:\n");
|
|
402
450
|
await sendGoogleChatMessage(errorText);
|
|
403
451
|
}
|
|
404
452
|
|
|
405
|
-
if (
|
|
406
|
-
const
|
|
407
|
-
logErrors(
|
|
453
|
+
if (skippedUrls.length > 0) {
|
|
454
|
+
const skippedUrlsText = createSkippedResult(skippedUrls);
|
|
455
|
+
logErrors(skippedUrlsText, "\nSkipped origins:\n");
|
|
408
456
|
}
|
|
409
457
|
|
|
410
458
|
logStatistics(ok, Math.ceil(finishTime - startTime));
|